162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Virtio-mem device driver. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright Red Hat, Inc. 2020 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Author(s): David Hildenbrand <david@redhat.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/virtio.h> 1162306a36Sopenharmony_ci#include <linux/virtio_mem.h> 1262306a36Sopenharmony_ci#include <linux/workqueue.h> 1362306a36Sopenharmony_ci#include <linux/slab.h> 1462306a36Sopenharmony_ci#include <linux/module.h> 1562306a36Sopenharmony_ci#include <linux/mm.h> 1662306a36Sopenharmony_ci#include <linux/memory_hotplug.h> 1762306a36Sopenharmony_ci#include <linux/memory.h> 1862306a36Sopenharmony_ci#include <linux/hrtimer.h> 1962306a36Sopenharmony_ci#include <linux/crash_dump.h> 2062306a36Sopenharmony_ci#include <linux/mutex.h> 2162306a36Sopenharmony_ci#include <linux/bitmap.h> 2262306a36Sopenharmony_ci#include <linux/lockdep.h> 2362306a36Sopenharmony_ci#include <linux/log2.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include <acpi/acpi_numa.h> 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic bool unplug_online = true; 2862306a36Sopenharmony_cimodule_param(unplug_online, bool, 0644); 2962306a36Sopenharmony_ciMODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_cistatic bool force_bbm; 3262306a36Sopenharmony_cimodule_param(force_bbm, bool, 0444); 3362306a36Sopenharmony_ciMODULE_PARM_DESC(force_bbm, 3462306a36Sopenharmony_ci "Force Big Block Mode. Default is 0 (auto-selection)"); 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_cistatic unsigned long bbm_block_size; 3762306a36Sopenharmony_cimodule_param(bbm_block_size, ulong, 0444); 3862306a36Sopenharmony_ciMODULE_PARM_DESC(bbm_block_size, 3962306a36Sopenharmony_ci "Big Block size in bytes. Default is 0 (auto-detection)."); 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci/* 4262306a36Sopenharmony_ci * virtio-mem currently supports the following modes of operation: 4362306a36Sopenharmony_ci * 4462306a36Sopenharmony_ci * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The 4562306a36Sopenharmony_ci * size of a Sub Block (SB) is determined based on the device block size, the 4662306a36Sopenharmony_ci * pageblock size, and the maximum allocation granularity of the buddy. 4762306a36Sopenharmony_ci * Subblocks within a Linux memory block might either be plugged or unplugged. 4862306a36Sopenharmony_ci * Memory is added/removed to Linux MM in Linux memory block granularity. 4962306a36Sopenharmony_ci * 5062306a36Sopenharmony_ci * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks. 5162306a36Sopenharmony_ci * Memory is added/removed to Linux MM in Big Block granularity. 5262306a36Sopenharmony_ci * 5362306a36Sopenharmony_ci * The mode is determined automatically based on the Linux memory block size 5462306a36Sopenharmony_ci * and the device block size. 5562306a36Sopenharmony_ci * 5662306a36Sopenharmony_ci * User space / core MM (auto onlining) is responsible for onlining added 5762306a36Sopenharmony_ci * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are 5862306a36Sopenharmony_ci * always onlined separately, and all memory within a Linux memory block is 5962306a36Sopenharmony_ci * onlined to the same zone - virtio-mem relies on this behavior. 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci/* 6362306a36Sopenharmony_ci * State of a Linux memory block in SBM. 6462306a36Sopenharmony_ci */ 6562306a36Sopenharmony_cienum virtio_mem_sbm_mb_state { 6662306a36Sopenharmony_ci /* Unplugged, not added to Linux. Can be reused later. */ 6762306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_UNUSED = 0, 6862306a36Sopenharmony_ci /* (Partially) plugged, not added to Linux. Error on add_memory(). */ 6962306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_PLUGGED, 7062306a36Sopenharmony_ci /* Fully plugged, fully added to Linux, offline. */ 7162306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE, 7262306a36Sopenharmony_ci /* Partially plugged, fully added to Linux, offline. */ 7362306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, 7462306a36Sopenharmony_ci /* Fully plugged, fully added to Linux, onlined to a kernel zone. */ 7562306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_KERNEL, 7662306a36Sopenharmony_ci /* Partially plugged, fully added to Linux, online to a kernel zone */ 7762306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, 7862306a36Sopenharmony_ci /* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ 7962306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_MOVABLE, 8062306a36Sopenharmony_ci /* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ 8162306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, 8262306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_COUNT 8362306a36Sopenharmony_ci}; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci/* 8662306a36Sopenharmony_ci * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks. 8762306a36Sopenharmony_ci */ 8862306a36Sopenharmony_cienum virtio_mem_bbm_bb_state { 8962306a36Sopenharmony_ci /* Unplugged, not added to Linux. Can be reused later. */ 9062306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_UNUSED = 0, 9162306a36Sopenharmony_ci /* Plugged, not added to Linux. Error on add_memory(). */ 9262306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_PLUGGED, 9362306a36Sopenharmony_ci /* Plugged and added to Linux. */ 9462306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_ADDED, 9562306a36Sopenharmony_ci /* All online parts are fake-offline, ready to remove. */ 9662306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_FAKE_OFFLINE, 9762306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_COUNT 9862306a36Sopenharmony_ci}; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistruct virtio_mem { 10162306a36Sopenharmony_ci struct virtio_device *vdev; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci /* We might first have to unplug all memory when starting up. */ 10462306a36Sopenharmony_ci bool unplug_all_required; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci /* Workqueue that processes the plug/unplug requests. */ 10762306a36Sopenharmony_ci struct work_struct wq; 10862306a36Sopenharmony_ci atomic_t wq_active; 10962306a36Sopenharmony_ci atomic_t config_changed; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci /* Virtqueue for guest->host requests. */ 11262306a36Sopenharmony_ci struct virtqueue *vq; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci /* Wait for a host response to a guest request. */ 11562306a36Sopenharmony_ci wait_queue_head_t host_resp; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci /* Space for one guest request and the host response. */ 11862306a36Sopenharmony_ci struct virtio_mem_req req; 11962306a36Sopenharmony_ci struct virtio_mem_resp resp; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci /* The current size of the device. */ 12262306a36Sopenharmony_ci uint64_t plugged_size; 12362306a36Sopenharmony_ci /* The requested size of the device. */ 12462306a36Sopenharmony_ci uint64_t requested_size; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci /* The device block size (for communicating with the device). */ 12762306a36Sopenharmony_ci uint64_t device_block_size; 12862306a36Sopenharmony_ci /* The determined node id for all memory of the device. */ 12962306a36Sopenharmony_ci int nid; 13062306a36Sopenharmony_ci /* Physical start address of the memory region. */ 13162306a36Sopenharmony_ci uint64_t addr; 13262306a36Sopenharmony_ci /* Maximum region size in bytes. */ 13362306a36Sopenharmony_ci uint64_t region_size; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci /* The parent resource for all memory added via this device. */ 13662306a36Sopenharmony_ci struct resource *parent_resource; 13762306a36Sopenharmony_ci /* 13862306a36Sopenharmony_ci * Copy of "System RAM (virtio_mem)" to be used for 13962306a36Sopenharmony_ci * add_memory_driver_managed(). 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_ci const char *resource_name; 14262306a36Sopenharmony_ci /* Memory group identification. */ 14362306a36Sopenharmony_ci int mgid; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci /* 14662306a36Sopenharmony_ci * We don't want to add too much memory if it's not getting onlined, 14762306a36Sopenharmony_ci * to avoid running OOM. Besides this threshold, we allow to have at 14862306a36Sopenharmony_ci * least two offline blocks at a time (whatever is bigger). 14962306a36Sopenharmony_ci */ 15062306a36Sopenharmony_ci#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024) 15162306a36Sopenharmony_ci atomic64_t offline_size; 15262306a36Sopenharmony_ci uint64_t offline_threshold; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci /* If set, the driver is in SBM, otherwise in BBM. */ 15562306a36Sopenharmony_ci bool in_sbm; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci union { 15862306a36Sopenharmony_ci struct { 15962306a36Sopenharmony_ci /* Id of the first memory block of this device. */ 16062306a36Sopenharmony_ci unsigned long first_mb_id; 16162306a36Sopenharmony_ci /* Id of the last usable memory block of this device. */ 16262306a36Sopenharmony_ci unsigned long last_usable_mb_id; 16362306a36Sopenharmony_ci /* Id of the next memory bock to prepare when needed. */ 16462306a36Sopenharmony_ci unsigned long next_mb_id; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci /* The subblock size. */ 16762306a36Sopenharmony_ci uint64_t sb_size; 16862306a36Sopenharmony_ci /* The number of subblocks per Linux memory block. */ 16962306a36Sopenharmony_ci uint32_t sbs_per_mb; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci /* 17262306a36Sopenharmony_ci * Some of the Linux memory blocks tracked as "partially 17362306a36Sopenharmony_ci * plugged" are completely unplugged and can be offlined 17462306a36Sopenharmony_ci * and removed -- which previously failed. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci bool have_unplugged_mb; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci /* Summary of all memory block states. */ 17962306a36Sopenharmony_ci unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci /* 18262306a36Sopenharmony_ci * One byte state per memory block. Allocated via 18362306a36Sopenharmony_ci * vmalloc(). Resized (alloc+copy+free) on demand. 18462306a36Sopenharmony_ci * 18562306a36Sopenharmony_ci * With 128 MiB memory blocks, we have states for 512 18662306a36Sopenharmony_ci * GiB of memory in one 4 KiB page. 18762306a36Sopenharmony_ci */ 18862306a36Sopenharmony_ci uint8_t *mb_states; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci /* 19162306a36Sopenharmony_ci * Bitmap: one bit per subblock. Allocated similar to 19262306a36Sopenharmony_ci * sbm.mb_states. 19362306a36Sopenharmony_ci * 19462306a36Sopenharmony_ci * A set bit means the corresponding subblock is 19562306a36Sopenharmony_ci * plugged, otherwise it's unblocked. 19662306a36Sopenharmony_ci * 19762306a36Sopenharmony_ci * With 4 MiB subblocks, we manage 128 GiB of memory 19862306a36Sopenharmony_ci * in one 4 KiB page. 19962306a36Sopenharmony_ci */ 20062306a36Sopenharmony_ci unsigned long *sb_states; 20162306a36Sopenharmony_ci } sbm; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci struct { 20462306a36Sopenharmony_ci /* Id of the first big block of this device. */ 20562306a36Sopenharmony_ci unsigned long first_bb_id; 20662306a36Sopenharmony_ci /* Id of the last usable big block of this device. */ 20762306a36Sopenharmony_ci unsigned long last_usable_bb_id; 20862306a36Sopenharmony_ci /* Id of the next device bock to prepare when needed. */ 20962306a36Sopenharmony_ci unsigned long next_bb_id; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci /* Summary of all big block states. */ 21262306a36Sopenharmony_ci unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT]; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci /* One byte state per big block. See sbm.mb_states. */ 21562306a36Sopenharmony_ci uint8_t *bb_states; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci /* The block size used for plugging/adding/removing. */ 21862306a36Sopenharmony_ci uint64_t bb_size; 21962306a36Sopenharmony_ci } bbm; 22062306a36Sopenharmony_ci }; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci /* 22362306a36Sopenharmony_ci * Mutex that protects the sbm.mb_count, sbm.mb_states, 22462306a36Sopenharmony_ci * sbm.sb_states, bbm.bb_count, and bbm.bb_states 22562306a36Sopenharmony_ci * 22662306a36Sopenharmony_ci * When this lock is held the pointers can't change, ONLINE and 22762306a36Sopenharmony_ci * OFFLINE blocks can't change the state and no subblocks will get 22862306a36Sopenharmony_ci * plugged/unplugged. 22962306a36Sopenharmony_ci * 23062306a36Sopenharmony_ci * In kdump mode, used to serialize requests, last_block_addr and 23162306a36Sopenharmony_ci * last_block_plugged. 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_ci struct mutex hotplug_mutex; 23462306a36Sopenharmony_ci bool hotplug_active; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci /* An error occurred we cannot handle - stop processing requests. */ 23762306a36Sopenharmony_ci bool broken; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci /* Cached valued of is_kdump_kernel() when the device was probed. */ 24062306a36Sopenharmony_ci bool in_kdump; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci /* The driver is being removed. */ 24362306a36Sopenharmony_ci spinlock_t removal_lock; 24462306a36Sopenharmony_ci bool removing; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci /* Timer for retrying to plug/unplug memory. */ 24762306a36Sopenharmony_ci struct hrtimer retry_timer; 24862306a36Sopenharmony_ci unsigned int retry_timer_ms; 24962306a36Sopenharmony_ci#define VIRTIO_MEM_RETRY_TIMER_MIN_MS 50000 25062306a36Sopenharmony_ci#define VIRTIO_MEM_RETRY_TIMER_MAX_MS 300000 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci /* Memory notifier (online/offline events). */ 25362306a36Sopenharmony_ci struct notifier_block memory_notifier; 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE 25662306a36Sopenharmony_ci /* vmcore callback for /proc/vmcore handling in kdump mode */ 25762306a36Sopenharmony_ci struct vmcore_cb vmcore_cb; 25862306a36Sopenharmony_ci uint64_t last_block_addr; 25962306a36Sopenharmony_ci bool last_block_plugged; 26062306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */ 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci /* Next device in the list of virtio-mem devices. */ 26362306a36Sopenharmony_ci struct list_head next; 26462306a36Sopenharmony_ci}; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci/* 26762306a36Sopenharmony_ci * We have to share a single online_page callback among all virtio-mem 26862306a36Sopenharmony_ci * devices. We use RCU to iterate the list in the callback. 26962306a36Sopenharmony_ci */ 27062306a36Sopenharmony_cistatic DEFINE_MUTEX(virtio_mem_mutex); 27162306a36Sopenharmony_cistatic LIST_HEAD(virtio_mem_devices); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_cistatic void virtio_mem_online_page_cb(struct page *page, unsigned int order); 27462306a36Sopenharmony_cistatic void virtio_mem_fake_offline_going_offline(unsigned long pfn, 27562306a36Sopenharmony_ci unsigned long nr_pages); 27662306a36Sopenharmony_cistatic void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, 27762306a36Sopenharmony_ci unsigned long nr_pages); 27862306a36Sopenharmony_cistatic void virtio_mem_retry(struct virtio_mem *vm); 27962306a36Sopenharmony_cistatic int virtio_mem_create_resource(struct virtio_mem *vm); 28062306a36Sopenharmony_cistatic void virtio_mem_delete_resource(struct virtio_mem *vm); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci/* 28362306a36Sopenharmony_ci * Register a virtio-mem device so it will be considered for the online_page 28462306a36Sopenharmony_ci * callback. 28562306a36Sopenharmony_ci */ 28662306a36Sopenharmony_cistatic int register_virtio_mem_device(struct virtio_mem *vm) 28762306a36Sopenharmony_ci{ 28862306a36Sopenharmony_ci int rc = 0; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* First device registers the callback. */ 29162306a36Sopenharmony_ci mutex_lock(&virtio_mem_mutex); 29262306a36Sopenharmony_ci if (list_empty(&virtio_mem_devices)) 29362306a36Sopenharmony_ci rc = set_online_page_callback(&virtio_mem_online_page_cb); 29462306a36Sopenharmony_ci if (!rc) 29562306a36Sopenharmony_ci list_add_rcu(&vm->next, &virtio_mem_devices); 29662306a36Sopenharmony_ci mutex_unlock(&virtio_mem_mutex); 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci return rc; 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci/* 30262306a36Sopenharmony_ci * Unregister a virtio-mem device so it will no longer be considered for the 30362306a36Sopenharmony_ci * online_page callback. 30462306a36Sopenharmony_ci */ 30562306a36Sopenharmony_cistatic void unregister_virtio_mem_device(struct virtio_mem *vm) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci /* Last device unregisters the callback. */ 30862306a36Sopenharmony_ci mutex_lock(&virtio_mem_mutex); 30962306a36Sopenharmony_ci list_del_rcu(&vm->next); 31062306a36Sopenharmony_ci if (list_empty(&virtio_mem_devices)) 31162306a36Sopenharmony_ci restore_online_page_callback(&virtio_mem_online_page_cb); 31262306a36Sopenharmony_ci mutex_unlock(&virtio_mem_mutex); 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci synchronize_rcu(); 31562306a36Sopenharmony_ci} 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci/* 31862306a36Sopenharmony_ci * Calculate the memory block id of a given address. 31962306a36Sopenharmony_ci */ 32062306a36Sopenharmony_cistatic unsigned long virtio_mem_phys_to_mb_id(unsigned long addr) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci return addr / memory_block_size_bytes(); 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci/* 32662306a36Sopenharmony_ci * Calculate the physical start address of a given memory block id. 32762306a36Sopenharmony_ci */ 32862306a36Sopenharmony_cistatic unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id) 32962306a36Sopenharmony_ci{ 33062306a36Sopenharmony_ci return mb_id * memory_block_size_bytes(); 33162306a36Sopenharmony_ci} 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci/* 33462306a36Sopenharmony_ci * Calculate the big block id of a given address. 33562306a36Sopenharmony_ci */ 33662306a36Sopenharmony_cistatic unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm, 33762306a36Sopenharmony_ci uint64_t addr) 33862306a36Sopenharmony_ci{ 33962306a36Sopenharmony_ci return addr / vm->bbm.bb_size; 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci/* 34362306a36Sopenharmony_ci * Calculate the physical start address of a given big block id. 34462306a36Sopenharmony_ci */ 34562306a36Sopenharmony_cistatic uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm, 34662306a36Sopenharmony_ci unsigned long bb_id) 34762306a36Sopenharmony_ci{ 34862306a36Sopenharmony_ci return bb_id * vm->bbm.bb_size; 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci/* 35262306a36Sopenharmony_ci * Calculate the subblock id of a given address. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_cistatic unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, 35562306a36Sopenharmony_ci unsigned long addr) 35662306a36Sopenharmony_ci{ 35762306a36Sopenharmony_ci const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); 35862306a36Sopenharmony_ci const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id); 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci return (addr - mb_addr) / vm->sbm.sb_size; 36162306a36Sopenharmony_ci} 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci/* 36462306a36Sopenharmony_ci * Set the state of a big block, taking care of the state counter. 36562306a36Sopenharmony_ci */ 36662306a36Sopenharmony_cistatic void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm, 36762306a36Sopenharmony_ci unsigned long bb_id, 36862306a36Sopenharmony_ci enum virtio_mem_bbm_bb_state state) 36962306a36Sopenharmony_ci{ 37062306a36Sopenharmony_ci const unsigned long idx = bb_id - vm->bbm.first_bb_id; 37162306a36Sopenharmony_ci enum virtio_mem_bbm_bb_state old_state; 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci old_state = vm->bbm.bb_states[idx]; 37462306a36Sopenharmony_ci vm->bbm.bb_states[idx] = state; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci BUG_ON(vm->bbm.bb_count[old_state] == 0); 37762306a36Sopenharmony_ci vm->bbm.bb_count[old_state]--; 37862306a36Sopenharmony_ci vm->bbm.bb_count[state]++; 37962306a36Sopenharmony_ci} 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci/* 38262306a36Sopenharmony_ci * Get the state of a big block. 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_cistatic enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm, 38562306a36Sopenharmony_ci unsigned long bb_id) 38662306a36Sopenharmony_ci{ 38762306a36Sopenharmony_ci return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id]; 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci/* 39162306a36Sopenharmony_ci * Prepare the big block state array for the next big block. 39262306a36Sopenharmony_ci */ 39362306a36Sopenharmony_cistatic int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) 39462306a36Sopenharmony_ci{ 39562306a36Sopenharmony_ci unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id; 39662306a36Sopenharmony_ci unsigned long new_bytes = old_bytes + 1; 39762306a36Sopenharmony_ci int old_pages = PFN_UP(old_bytes); 39862306a36Sopenharmony_ci int new_pages = PFN_UP(new_bytes); 39962306a36Sopenharmony_ci uint8_t *new_array; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci if (vm->bbm.bb_states && old_pages == new_pages) 40262306a36Sopenharmony_ci return 0; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci new_array = vzalloc(new_pages * PAGE_SIZE); 40562306a36Sopenharmony_ci if (!new_array) 40662306a36Sopenharmony_ci return -ENOMEM; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 40962306a36Sopenharmony_ci if (vm->bbm.bb_states) 41062306a36Sopenharmony_ci memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE); 41162306a36Sopenharmony_ci vfree(vm->bbm.bb_states); 41262306a36Sopenharmony_ci vm->bbm.bb_states = new_array; 41362306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci return 0; 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci#define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \ 41962306a36Sopenharmony_ci for (_bb_id = vm->bbm.first_bb_id; \ 42062306a36Sopenharmony_ci _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \ 42162306a36Sopenharmony_ci _bb_id++) \ 42262306a36Sopenharmony_ci if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci#define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \ 42562306a36Sopenharmony_ci for (_bb_id = vm->bbm.next_bb_id - 1; \ 42662306a36Sopenharmony_ci _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \ 42762306a36Sopenharmony_ci _bb_id--) \ 42862306a36Sopenharmony_ci if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci/* 43162306a36Sopenharmony_ci * Set the state of a memory block, taking care of the state counter. 43262306a36Sopenharmony_ci */ 43362306a36Sopenharmony_cistatic void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, 43462306a36Sopenharmony_ci unsigned long mb_id, uint8_t state) 43562306a36Sopenharmony_ci{ 43662306a36Sopenharmony_ci const unsigned long idx = mb_id - vm->sbm.first_mb_id; 43762306a36Sopenharmony_ci uint8_t old_state; 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci old_state = vm->sbm.mb_states[idx]; 44062306a36Sopenharmony_ci vm->sbm.mb_states[idx] = state; 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci BUG_ON(vm->sbm.mb_count[old_state] == 0); 44362306a36Sopenharmony_ci vm->sbm.mb_count[old_state]--; 44462306a36Sopenharmony_ci vm->sbm.mb_count[state]++; 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci/* 44862306a36Sopenharmony_ci * Get the state of a memory block. 44962306a36Sopenharmony_ci */ 45062306a36Sopenharmony_cistatic uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, 45162306a36Sopenharmony_ci unsigned long mb_id) 45262306a36Sopenharmony_ci{ 45362306a36Sopenharmony_ci const unsigned long idx = mb_id - vm->sbm.first_mb_id; 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci return vm->sbm.mb_states[idx]; 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci/* 45962306a36Sopenharmony_ci * Prepare the state array for the next memory block. 46062306a36Sopenharmony_ci */ 46162306a36Sopenharmony_cistatic int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id); 46462306a36Sopenharmony_ci int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1); 46562306a36Sopenharmony_ci uint8_t *new_array; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci if (vm->sbm.mb_states && old_pages == new_pages) 46862306a36Sopenharmony_ci return 0; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci new_array = vzalloc(new_pages * PAGE_SIZE); 47162306a36Sopenharmony_ci if (!new_array) 47262306a36Sopenharmony_ci return -ENOMEM; 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 47562306a36Sopenharmony_ci if (vm->sbm.mb_states) 47662306a36Sopenharmony_ci memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE); 47762306a36Sopenharmony_ci vfree(vm->sbm.mb_states); 47862306a36Sopenharmony_ci vm->sbm.mb_states = new_array; 47962306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci return 0; 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci#define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ 48562306a36Sopenharmony_ci for (_mb_id = _vm->sbm.first_mb_id; \ 48662306a36Sopenharmony_ci _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \ 48762306a36Sopenharmony_ci _mb_id++) \ 48862306a36Sopenharmony_ci if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci#define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ 49162306a36Sopenharmony_ci for (_mb_id = _vm->sbm.next_mb_id - 1; \ 49262306a36Sopenharmony_ci _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \ 49362306a36Sopenharmony_ci _mb_id--) \ 49462306a36Sopenharmony_ci if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci/* 49762306a36Sopenharmony_ci * Calculate the bit number in the subblock bitmap for the given subblock 49862306a36Sopenharmony_ci * inside the given memory block. 49962306a36Sopenharmony_ci */ 50062306a36Sopenharmony_cistatic int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, 50162306a36Sopenharmony_ci unsigned long mb_id, int sb_id) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id; 50462306a36Sopenharmony_ci} 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci/* 50762306a36Sopenharmony_ci * Mark all selected subblocks plugged. 50862306a36Sopenharmony_ci * 50962306a36Sopenharmony_ci * Will not modify the state of the memory block. 51062306a36Sopenharmony_ci */ 51162306a36Sopenharmony_cistatic void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm, 51262306a36Sopenharmony_ci unsigned long mb_id, int sb_id, 51362306a36Sopenharmony_ci int count) 51462306a36Sopenharmony_ci{ 51562306a36Sopenharmony_ci const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci __bitmap_set(vm->sbm.sb_states, bit, count); 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci/* 52162306a36Sopenharmony_ci * Mark all selected subblocks unplugged. 52262306a36Sopenharmony_ci * 52362306a36Sopenharmony_ci * Will not modify the state of the memory block. 52462306a36Sopenharmony_ci */ 52562306a36Sopenharmony_cistatic void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm, 52662306a36Sopenharmony_ci unsigned long mb_id, int sb_id, 52762306a36Sopenharmony_ci int count) 52862306a36Sopenharmony_ci{ 52962306a36Sopenharmony_ci const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci __bitmap_clear(vm->sbm.sb_states, bit, count); 53262306a36Sopenharmony_ci} 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci/* 53562306a36Sopenharmony_ci * Test if all selected subblocks are plugged. 53662306a36Sopenharmony_ci */ 53762306a36Sopenharmony_cistatic bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm, 53862306a36Sopenharmony_ci unsigned long mb_id, int sb_id, 53962306a36Sopenharmony_ci int count) 54062306a36Sopenharmony_ci{ 54162306a36Sopenharmony_ci const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci if (count == 1) 54462306a36Sopenharmony_ci return test_bit(bit, vm->sbm.sb_states); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci /* TODO: Helper similar to bitmap_set() */ 54762306a36Sopenharmony_ci return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >= 54862306a36Sopenharmony_ci bit + count; 54962306a36Sopenharmony_ci} 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci/* 55262306a36Sopenharmony_ci * Test if all selected subblocks are unplugged. 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_cistatic bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, 55562306a36Sopenharmony_ci unsigned long mb_id, int sb_id, 55662306a36Sopenharmony_ci int count) 55762306a36Sopenharmony_ci{ 55862306a36Sopenharmony_ci const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci /* TODO: Helper similar to bitmap_set() */ 56162306a36Sopenharmony_ci return find_next_bit(vm->sbm.sb_states, bit + count, bit) >= 56262306a36Sopenharmony_ci bit + count; 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci/* 56662306a36Sopenharmony_ci * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is 56762306a36Sopenharmony_ci * none. 56862306a36Sopenharmony_ci */ 56962306a36Sopenharmony_cistatic int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, 57062306a36Sopenharmony_ci unsigned long mb_id) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci return find_next_zero_bit(vm->sbm.sb_states, 57562306a36Sopenharmony_ci bit + vm->sbm.sbs_per_mb, bit) - bit; 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci/* 57962306a36Sopenharmony_ci * Prepare the subblock bitmap for the next memory block. 58062306a36Sopenharmony_ci */ 58162306a36Sopenharmony_cistatic int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) 58262306a36Sopenharmony_ci{ 58362306a36Sopenharmony_ci const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id; 58462306a36Sopenharmony_ci const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; 58562306a36Sopenharmony_ci const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; 58662306a36Sopenharmony_ci int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); 58762306a36Sopenharmony_ci int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); 58862306a36Sopenharmony_ci unsigned long *new_bitmap, *old_bitmap; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci if (vm->sbm.sb_states && old_pages == new_pages) 59162306a36Sopenharmony_ci return 0; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci new_bitmap = vzalloc(new_pages * PAGE_SIZE); 59462306a36Sopenharmony_ci if (!new_bitmap) 59562306a36Sopenharmony_ci return -ENOMEM; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 59862306a36Sopenharmony_ci if (vm->sbm.sb_states) 59962306a36Sopenharmony_ci memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE); 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci old_bitmap = vm->sbm.sb_states; 60262306a36Sopenharmony_ci vm->sbm.sb_states = new_bitmap; 60362306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci vfree(old_bitmap); 60662306a36Sopenharmony_ci return 0; 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci/* 61062306a36Sopenharmony_ci * Test if we could add memory without creating too much offline memory - 61162306a36Sopenharmony_ci * to avoid running OOM if memory is getting onlined deferred. 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_cistatic bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) 61462306a36Sopenharmony_ci{ 61562306a36Sopenharmony_ci if (WARN_ON_ONCE(size > vm->offline_threshold)) 61662306a36Sopenharmony_ci return false; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold; 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci/* 62262306a36Sopenharmony_ci * Try adding memory to Linux. Will usually only fail if out of memory. 62362306a36Sopenharmony_ci * 62462306a36Sopenharmony_ci * Must not be called with the vm->hotplug_mutex held (possible deadlock with 62562306a36Sopenharmony_ci * onlining code). 62662306a36Sopenharmony_ci * 62762306a36Sopenharmony_ci * Will not modify the state of memory blocks in virtio-mem. 62862306a36Sopenharmony_ci */ 62962306a36Sopenharmony_cistatic int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, 63062306a36Sopenharmony_ci uint64_t size) 63162306a36Sopenharmony_ci{ 63262306a36Sopenharmony_ci int rc; 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci /* 63562306a36Sopenharmony_ci * When force-unloading the driver and we still have memory added to 63662306a36Sopenharmony_ci * Linux, the resource name has to stay. 63762306a36Sopenharmony_ci */ 63862306a36Sopenharmony_ci if (!vm->resource_name) { 63962306a36Sopenharmony_ci vm->resource_name = kstrdup_const("System RAM (virtio_mem)", 64062306a36Sopenharmony_ci GFP_KERNEL); 64162306a36Sopenharmony_ci if (!vm->resource_name) 64262306a36Sopenharmony_ci return -ENOMEM; 64362306a36Sopenharmony_ci } 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr, 64662306a36Sopenharmony_ci addr + size - 1); 64762306a36Sopenharmony_ci /* Memory might get onlined immediately. */ 64862306a36Sopenharmony_ci atomic64_add(size, &vm->offline_size); 64962306a36Sopenharmony_ci rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, 65062306a36Sopenharmony_ci MHP_MERGE_RESOURCE | MHP_NID_IS_MGID); 65162306a36Sopenharmony_ci if (rc) { 65262306a36Sopenharmony_ci atomic64_sub(size, &vm->offline_size); 65362306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); 65462306a36Sopenharmony_ci /* 65562306a36Sopenharmony_ci * TODO: Linux MM does not properly clean up yet in all cases 65662306a36Sopenharmony_ci * where adding of memory failed - especially on -ENOMEM. 65762306a36Sopenharmony_ci */ 65862306a36Sopenharmony_ci } 65962306a36Sopenharmony_ci return rc; 66062306a36Sopenharmony_ci} 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci/* 66362306a36Sopenharmony_ci * See virtio_mem_add_memory(): Try adding a single Linux memory block. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_cistatic int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) 66662306a36Sopenharmony_ci{ 66762306a36Sopenharmony_ci const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); 66862306a36Sopenharmony_ci const uint64_t size = memory_block_size_bytes(); 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci return virtio_mem_add_memory(vm, addr, size); 67162306a36Sopenharmony_ci} 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci/* 67462306a36Sopenharmony_ci * See virtio_mem_add_memory(): Try adding a big block. 67562306a36Sopenharmony_ci */ 67662306a36Sopenharmony_cistatic int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 67962306a36Sopenharmony_ci const uint64_t size = vm->bbm.bb_size; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci return virtio_mem_add_memory(vm, addr, size); 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci/* 68562306a36Sopenharmony_ci * Try removing memory from Linux. Will only fail if memory blocks aren't 68662306a36Sopenharmony_ci * offline. 68762306a36Sopenharmony_ci * 68862306a36Sopenharmony_ci * Must not be called with the vm->hotplug_mutex held (possible deadlock with 68962306a36Sopenharmony_ci * onlining code). 69062306a36Sopenharmony_ci * 69162306a36Sopenharmony_ci * Will not modify the state of memory blocks in virtio-mem. 69262306a36Sopenharmony_ci */ 69362306a36Sopenharmony_cistatic int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, 69462306a36Sopenharmony_ci uint64_t size) 69562306a36Sopenharmony_ci{ 69662306a36Sopenharmony_ci int rc; 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, 69962306a36Sopenharmony_ci addr + size - 1); 70062306a36Sopenharmony_ci rc = remove_memory(addr, size); 70162306a36Sopenharmony_ci if (!rc) { 70262306a36Sopenharmony_ci atomic64_sub(size, &vm->offline_size); 70362306a36Sopenharmony_ci /* 70462306a36Sopenharmony_ci * We might have freed up memory we can now unplug, retry 70562306a36Sopenharmony_ci * immediately instead of waiting. 70662306a36Sopenharmony_ci */ 70762306a36Sopenharmony_ci virtio_mem_retry(vm); 70862306a36Sopenharmony_ci } else { 70962306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc); 71062306a36Sopenharmony_ci } 71162306a36Sopenharmony_ci return rc; 71262306a36Sopenharmony_ci} 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci/* 71562306a36Sopenharmony_ci * See virtio_mem_remove_memory(): Try removing a single Linux memory block. 71662306a36Sopenharmony_ci */ 71762306a36Sopenharmony_cistatic int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) 71862306a36Sopenharmony_ci{ 71962306a36Sopenharmony_ci const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); 72062306a36Sopenharmony_ci const uint64_t size = memory_block_size_bytes(); 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci return virtio_mem_remove_memory(vm, addr, size); 72362306a36Sopenharmony_ci} 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci/* 72662306a36Sopenharmony_ci * Try offlining and removing memory from Linux. 72762306a36Sopenharmony_ci * 72862306a36Sopenharmony_ci * Must not be called with the vm->hotplug_mutex held (possible deadlock with 72962306a36Sopenharmony_ci * onlining code). 73062306a36Sopenharmony_ci * 73162306a36Sopenharmony_ci * Will not modify the state of memory blocks in virtio-mem. 73262306a36Sopenharmony_ci */ 73362306a36Sopenharmony_cistatic int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, 73462306a36Sopenharmony_ci uint64_t addr, 73562306a36Sopenharmony_ci uint64_t size) 73662306a36Sopenharmony_ci{ 73762306a36Sopenharmony_ci int rc; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, 74062306a36Sopenharmony_ci "offlining and removing memory: 0x%llx - 0x%llx\n", addr, 74162306a36Sopenharmony_ci addr + size - 1); 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci rc = offline_and_remove_memory(addr, size); 74462306a36Sopenharmony_ci if (!rc) { 74562306a36Sopenharmony_ci atomic64_sub(size, &vm->offline_size); 74662306a36Sopenharmony_ci /* 74762306a36Sopenharmony_ci * We might have freed up memory we can now unplug, retry 74862306a36Sopenharmony_ci * immediately instead of waiting. 74962306a36Sopenharmony_ci */ 75062306a36Sopenharmony_ci virtio_mem_retry(vm); 75162306a36Sopenharmony_ci return 0; 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc); 75462306a36Sopenharmony_ci /* 75562306a36Sopenharmony_ci * We don't really expect this to fail, because we fake-offlined all 75662306a36Sopenharmony_ci * memory already. But it could fail in corner cases. 75762306a36Sopenharmony_ci */ 75862306a36Sopenharmony_ci WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY); 75962306a36Sopenharmony_ci return rc == -ENOMEM ? -ENOMEM : -EBUSY; 76062306a36Sopenharmony_ci} 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci/* 76362306a36Sopenharmony_ci * See virtio_mem_offline_and_remove_memory(): Try offlining and removing 76462306a36Sopenharmony_ci * a single Linux memory block. 76562306a36Sopenharmony_ci */ 76662306a36Sopenharmony_cistatic int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, 76762306a36Sopenharmony_ci unsigned long mb_id) 76862306a36Sopenharmony_ci{ 76962306a36Sopenharmony_ci const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); 77062306a36Sopenharmony_ci const uint64_t size = memory_block_size_bytes(); 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci return virtio_mem_offline_and_remove_memory(vm, addr, size); 77362306a36Sopenharmony_ci} 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci/* 77662306a36Sopenharmony_ci * Try (offlining and) removing memory from Linux in case all subblocks are 77762306a36Sopenharmony_ci * unplugged. Can be called on online and offline memory blocks. 77862306a36Sopenharmony_ci * 77962306a36Sopenharmony_ci * May modify the state of memory blocks in virtio-mem. 78062306a36Sopenharmony_ci */ 78162306a36Sopenharmony_cistatic int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm, 78262306a36Sopenharmony_ci unsigned long mb_id) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci int rc; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci /* 78762306a36Sopenharmony_ci * Once all subblocks of a memory block were unplugged, offline and 78862306a36Sopenharmony_ci * remove it. 78962306a36Sopenharmony_ci */ 79062306a36Sopenharmony_ci if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) 79162306a36Sopenharmony_ci return 0; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci /* offline_and_remove_memory() works for online and offline memory. */ 79462306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 79562306a36Sopenharmony_ci rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); 79662306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 79762306a36Sopenharmony_ci if (!rc) 79862306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 79962306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_UNUSED); 80062306a36Sopenharmony_ci return rc; 80162306a36Sopenharmony_ci} 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci/* 80462306a36Sopenharmony_ci * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a 80562306a36Sopenharmony_ci * all Linux memory blocks covered by the big block. 80662306a36Sopenharmony_ci */ 80762306a36Sopenharmony_cistatic int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm, 80862306a36Sopenharmony_ci unsigned long bb_id) 80962306a36Sopenharmony_ci{ 81062306a36Sopenharmony_ci const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 81162306a36Sopenharmony_ci const uint64_t size = vm->bbm.bb_size; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci return virtio_mem_offline_and_remove_memory(vm, addr, size); 81462306a36Sopenharmony_ci} 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci/* 81762306a36Sopenharmony_ci * Trigger the workqueue so the device can perform its magic. 81862306a36Sopenharmony_ci */ 81962306a36Sopenharmony_cistatic void virtio_mem_retry(struct virtio_mem *vm) 82062306a36Sopenharmony_ci{ 82162306a36Sopenharmony_ci unsigned long flags; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci spin_lock_irqsave(&vm->removal_lock, flags); 82462306a36Sopenharmony_ci if (!vm->removing) 82562306a36Sopenharmony_ci queue_work(system_freezable_wq, &vm->wq); 82662306a36Sopenharmony_ci spin_unlock_irqrestore(&vm->removal_lock, flags); 82762306a36Sopenharmony_ci} 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_cistatic int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id) 83062306a36Sopenharmony_ci{ 83162306a36Sopenharmony_ci int node = NUMA_NO_NODE; 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci#if defined(CONFIG_ACPI_NUMA) 83462306a36Sopenharmony_ci if (virtio_has_feature(vm->vdev, VIRTIO_MEM_F_ACPI_PXM)) 83562306a36Sopenharmony_ci node = pxm_to_node(node_id); 83662306a36Sopenharmony_ci#endif 83762306a36Sopenharmony_ci return node; 83862306a36Sopenharmony_ci} 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci/* 84162306a36Sopenharmony_ci * Test if a virtio-mem device overlaps with the given range. Can be called 84262306a36Sopenharmony_ci * from (notifier) callbacks lockless. 84362306a36Sopenharmony_ci */ 84462306a36Sopenharmony_cistatic bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start, 84562306a36Sopenharmony_ci uint64_t size) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci return start < vm->addr + vm->region_size && vm->addr < start + size; 84862306a36Sopenharmony_ci} 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci/* 85162306a36Sopenharmony_ci * Test if a virtio-mem device contains a given range. Can be called from 85262306a36Sopenharmony_ci * (notifier) callbacks lockless. 85362306a36Sopenharmony_ci */ 85462306a36Sopenharmony_cistatic bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, 85562306a36Sopenharmony_ci uint64_t size) 85662306a36Sopenharmony_ci{ 85762306a36Sopenharmony_ci return start >= vm->addr && start + size <= vm->addr + vm->region_size; 85862306a36Sopenharmony_ci} 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_cistatic int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm, 86162306a36Sopenharmony_ci unsigned long mb_id) 86262306a36Sopenharmony_ci{ 86362306a36Sopenharmony_ci switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { 86462306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: 86562306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_OFFLINE: 86662306a36Sopenharmony_ci return NOTIFY_OK; 86762306a36Sopenharmony_ci default: 86862306a36Sopenharmony_ci break; 86962306a36Sopenharmony_ci } 87062306a36Sopenharmony_ci dev_warn_ratelimited(&vm->vdev->dev, 87162306a36Sopenharmony_ci "memory block onlining denied\n"); 87262306a36Sopenharmony_ci return NOTIFY_BAD; 87362306a36Sopenharmony_ci} 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, 87662306a36Sopenharmony_ci unsigned long mb_id) 87762306a36Sopenharmony_ci{ 87862306a36Sopenharmony_ci switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { 87962306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: 88062306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: 88162306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 88262306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); 88362306a36Sopenharmony_ci break; 88462306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_KERNEL: 88562306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_MOVABLE: 88662306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 88762306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE); 88862306a36Sopenharmony_ci break; 88962306a36Sopenharmony_ci default: 89062306a36Sopenharmony_ci BUG(); 89162306a36Sopenharmony_ci break; 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci} 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_online(struct virtio_mem *vm, 89662306a36Sopenharmony_ci unsigned long mb_id, 89762306a36Sopenharmony_ci unsigned long start_pfn) 89862306a36Sopenharmony_ci{ 89962306a36Sopenharmony_ci const bool is_movable = is_zone_movable_page(pfn_to_page(start_pfn)); 90062306a36Sopenharmony_ci int new_state; 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { 90362306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: 90462306a36Sopenharmony_ci new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL; 90562306a36Sopenharmony_ci if (is_movable) 90662306a36Sopenharmony_ci new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL; 90762306a36Sopenharmony_ci break; 90862306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_OFFLINE: 90962306a36Sopenharmony_ci new_state = VIRTIO_MEM_SBM_MB_KERNEL; 91062306a36Sopenharmony_ci if (is_movable) 91162306a36Sopenharmony_ci new_state = VIRTIO_MEM_SBM_MB_MOVABLE; 91262306a36Sopenharmony_ci break; 91362306a36Sopenharmony_ci default: 91462306a36Sopenharmony_ci BUG(); 91562306a36Sopenharmony_ci break; 91662306a36Sopenharmony_ci } 91762306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); 91862306a36Sopenharmony_ci} 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, 92162306a36Sopenharmony_ci unsigned long mb_id) 92262306a36Sopenharmony_ci{ 92362306a36Sopenharmony_ci const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); 92462306a36Sopenharmony_ci unsigned long pfn; 92562306a36Sopenharmony_ci int sb_id; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { 92862306a36Sopenharmony_ci if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) 92962306a36Sopenharmony_ci continue; 93062306a36Sopenharmony_ci pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 93162306a36Sopenharmony_ci sb_id * vm->sbm.sb_size); 93262306a36Sopenharmony_ci virtio_mem_fake_offline_going_offline(pfn, nr_pages); 93362306a36Sopenharmony_ci } 93462306a36Sopenharmony_ci} 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, 93762306a36Sopenharmony_ci unsigned long mb_id) 93862306a36Sopenharmony_ci{ 93962306a36Sopenharmony_ci const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); 94062306a36Sopenharmony_ci unsigned long pfn; 94162306a36Sopenharmony_ci int sb_id; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { 94462306a36Sopenharmony_ci if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) 94562306a36Sopenharmony_ci continue; 94662306a36Sopenharmony_ci pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 94762306a36Sopenharmony_ci sb_id * vm->sbm.sb_size); 94862306a36Sopenharmony_ci virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); 94962306a36Sopenharmony_ci } 95062306a36Sopenharmony_ci} 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_cistatic void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm, 95362306a36Sopenharmony_ci unsigned long bb_id, 95462306a36Sopenharmony_ci unsigned long pfn, 95562306a36Sopenharmony_ci unsigned long nr_pages) 95662306a36Sopenharmony_ci{ 95762306a36Sopenharmony_ci /* 95862306a36Sopenharmony_ci * When marked as "fake-offline", all online memory of this device block 95962306a36Sopenharmony_ci * is allocated by us. Otherwise, we don't have any memory allocated. 96062306a36Sopenharmony_ci */ 96162306a36Sopenharmony_ci if (virtio_mem_bbm_get_bb_state(vm, bb_id) != 96262306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) 96362306a36Sopenharmony_ci return; 96462306a36Sopenharmony_ci virtio_mem_fake_offline_going_offline(pfn, nr_pages); 96562306a36Sopenharmony_ci} 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_cistatic void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm, 96862306a36Sopenharmony_ci unsigned long bb_id, 96962306a36Sopenharmony_ci unsigned long pfn, 97062306a36Sopenharmony_ci unsigned long nr_pages) 97162306a36Sopenharmony_ci{ 97262306a36Sopenharmony_ci if (virtio_mem_bbm_get_bb_state(vm, bb_id) != 97362306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) 97462306a36Sopenharmony_ci return; 97562306a36Sopenharmony_ci virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); 97662306a36Sopenharmony_ci} 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci/* 97962306a36Sopenharmony_ci * This callback will either be called synchronously from add_memory() or 98062306a36Sopenharmony_ci * asynchronously (e.g., triggered via user space). We have to be careful 98162306a36Sopenharmony_ci * with locking when calling add_memory(). 98262306a36Sopenharmony_ci */ 98362306a36Sopenharmony_cistatic int virtio_mem_memory_notifier_cb(struct notifier_block *nb, 98462306a36Sopenharmony_ci unsigned long action, void *arg) 98562306a36Sopenharmony_ci{ 98662306a36Sopenharmony_ci struct virtio_mem *vm = container_of(nb, struct virtio_mem, 98762306a36Sopenharmony_ci memory_notifier); 98862306a36Sopenharmony_ci struct memory_notify *mhp = arg; 98962306a36Sopenharmony_ci const unsigned long start = PFN_PHYS(mhp->start_pfn); 99062306a36Sopenharmony_ci const unsigned long size = PFN_PHYS(mhp->nr_pages); 99162306a36Sopenharmony_ci int rc = NOTIFY_OK; 99262306a36Sopenharmony_ci unsigned long id; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci if (!virtio_mem_overlaps_range(vm, start, size)) 99562306a36Sopenharmony_ci return NOTIFY_DONE; 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci if (vm->in_sbm) { 99862306a36Sopenharmony_ci id = virtio_mem_phys_to_mb_id(start); 99962306a36Sopenharmony_ci /* 100062306a36Sopenharmony_ci * In SBM, we add memory in separate memory blocks - we expect 100162306a36Sopenharmony_ci * it to be onlined/offlined in the same granularity. Bail out 100262306a36Sopenharmony_ci * if this ever changes. 100362306a36Sopenharmony_ci */ 100462306a36Sopenharmony_ci if (WARN_ON_ONCE(size != memory_block_size_bytes() || 100562306a36Sopenharmony_ci !IS_ALIGNED(start, memory_block_size_bytes()))) 100662306a36Sopenharmony_ci return NOTIFY_BAD; 100762306a36Sopenharmony_ci } else { 100862306a36Sopenharmony_ci id = virtio_mem_phys_to_bb_id(vm, start); 100962306a36Sopenharmony_ci /* 101062306a36Sopenharmony_ci * In BBM, we only care about onlining/offlining happening 101162306a36Sopenharmony_ci * within a single big block, we don't care about the 101262306a36Sopenharmony_ci * actual granularity as we don't track individual Linux 101362306a36Sopenharmony_ci * memory blocks. 101462306a36Sopenharmony_ci */ 101562306a36Sopenharmony_ci if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1))) 101662306a36Sopenharmony_ci return NOTIFY_BAD; 101762306a36Sopenharmony_ci } 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci /* 102062306a36Sopenharmony_ci * Avoid circular locking lockdep warnings. We lock the mutex 102162306a36Sopenharmony_ci * e.g., in MEM_GOING_ONLINE and unlock it in MEM_ONLINE. The 102262306a36Sopenharmony_ci * blocking_notifier_call_chain() has it's own lock, which gets unlocked 102362306a36Sopenharmony_ci * between both notifier calls and will bail out. False positive. 102462306a36Sopenharmony_ci */ 102562306a36Sopenharmony_ci lockdep_off(); 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci switch (action) { 102862306a36Sopenharmony_ci case MEM_GOING_OFFLINE: 102962306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 103062306a36Sopenharmony_ci if (vm->removing) { 103162306a36Sopenharmony_ci rc = notifier_from_errno(-EBUSY); 103262306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 103362306a36Sopenharmony_ci break; 103462306a36Sopenharmony_ci } 103562306a36Sopenharmony_ci vm->hotplug_active = true; 103662306a36Sopenharmony_ci if (vm->in_sbm) 103762306a36Sopenharmony_ci virtio_mem_sbm_notify_going_offline(vm, id); 103862306a36Sopenharmony_ci else 103962306a36Sopenharmony_ci virtio_mem_bbm_notify_going_offline(vm, id, 104062306a36Sopenharmony_ci mhp->start_pfn, 104162306a36Sopenharmony_ci mhp->nr_pages); 104262306a36Sopenharmony_ci break; 104362306a36Sopenharmony_ci case MEM_GOING_ONLINE: 104462306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 104562306a36Sopenharmony_ci if (vm->removing) { 104662306a36Sopenharmony_ci rc = notifier_from_errno(-EBUSY); 104762306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 104862306a36Sopenharmony_ci break; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci vm->hotplug_active = true; 105162306a36Sopenharmony_ci if (vm->in_sbm) 105262306a36Sopenharmony_ci rc = virtio_mem_sbm_notify_going_online(vm, id); 105362306a36Sopenharmony_ci break; 105462306a36Sopenharmony_ci case MEM_OFFLINE: 105562306a36Sopenharmony_ci if (vm->in_sbm) 105662306a36Sopenharmony_ci virtio_mem_sbm_notify_offline(vm, id); 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci atomic64_add(size, &vm->offline_size); 105962306a36Sopenharmony_ci /* 106062306a36Sopenharmony_ci * Trigger the workqueue. Now that we have some offline memory, 106162306a36Sopenharmony_ci * maybe we can handle pending unplug requests. 106262306a36Sopenharmony_ci */ 106362306a36Sopenharmony_ci if (!unplug_online) 106462306a36Sopenharmony_ci virtio_mem_retry(vm); 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci vm->hotplug_active = false; 106762306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 106862306a36Sopenharmony_ci break; 106962306a36Sopenharmony_ci case MEM_ONLINE: 107062306a36Sopenharmony_ci if (vm->in_sbm) 107162306a36Sopenharmony_ci virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn); 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_ci atomic64_sub(size, &vm->offline_size); 107462306a36Sopenharmony_ci /* 107562306a36Sopenharmony_ci * Start adding more memory once we onlined half of our 107662306a36Sopenharmony_ci * threshold. Don't trigger if it's possibly due to our actipn 107762306a36Sopenharmony_ci * (e.g., us adding memory which gets onlined immediately from 107862306a36Sopenharmony_ci * the core). 107962306a36Sopenharmony_ci */ 108062306a36Sopenharmony_ci if (!atomic_read(&vm->wq_active) && 108162306a36Sopenharmony_ci virtio_mem_could_add_memory(vm, vm->offline_threshold / 2)) 108262306a36Sopenharmony_ci virtio_mem_retry(vm); 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci vm->hotplug_active = false; 108562306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 108662306a36Sopenharmony_ci break; 108762306a36Sopenharmony_ci case MEM_CANCEL_OFFLINE: 108862306a36Sopenharmony_ci if (!vm->hotplug_active) 108962306a36Sopenharmony_ci break; 109062306a36Sopenharmony_ci if (vm->in_sbm) 109162306a36Sopenharmony_ci virtio_mem_sbm_notify_cancel_offline(vm, id); 109262306a36Sopenharmony_ci else 109362306a36Sopenharmony_ci virtio_mem_bbm_notify_cancel_offline(vm, id, 109462306a36Sopenharmony_ci mhp->start_pfn, 109562306a36Sopenharmony_ci mhp->nr_pages); 109662306a36Sopenharmony_ci vm->hotplug_active = false; 109762306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 109862306a36Sopenharmony_ci break; 109962306a36Sopenharmony_ci case MEM_CANCEL_ONLINE: 110062306a36Sopenharmony_ci if (!vm->hotplug_active) 110162306a36Sopenharmony_ci break; 110262306a36Sopenharmony_ci vm->hotplug_active = false; 110362306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 110462306a36Sopenharmony_ci break; 110562306a36Sopenharmony_ci default: 110662306a36Sopenharmony_ci break; 110762306a36Sopenharmony_ci } 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci lockdep_on(); 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci return rc; 111262306a36Sopenharmony_ci} 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci/* 111562306a36Sopenharmony_ci * Set a range of pages PG_offline. Remember pages that were never onlined 111662306a36Sopenharmony_ci * (via generic_online_page()) using PageDirty(). 111762306a36Sopenharmony_ci */ 111862306a36Sopenharmony_cistatic void virtio_mem_set_fake_offline(unsigned long pfn, 111962306a36Sopenharmony_ci unsigned long nr_pages, bool onlined) 112062306a36Sopenharmony_ci{ 112162306a36Sopenharmony_ci page_offline_begin(); 112262306a36Sopenharmony_ci for (; nr_pages--; pfn++) { 112362306a36Sopenharmony_ci struct page *page = pfn_to_page(pfn); 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci __SetPageOffline(page); 112662306a36Sopenharmony_ci if (!onlined) { 112762306a36Sopenharmony_ci SetPageDirty(page); 112862306a36Sopenharmony_ci /* FIXME: remove after cleanups */ 112962306a36Sopenharmony_ci ClearPageReserved(page); 113062306a36Sopenharmony_ci } 113162306a36Sopenharmony_ci } 113262306a36Sopenharmony_ci page_offline_end(); 113362306a36Sopenharmony_ci} 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci/* 113662306a36Sopenharmony_ci * Clear PG_offline from a range of pages. If the pages were never onlined, 113762306a36Sopenharmony_ci * (via generic_online_page()), clear PageDirty(). 113862306a36Sopenharmony_ci */ 113962306a36Sopenharmony_cistatic void virtio_mem_clear_fake_offline(unsigned long pfn, 114062306a36Sopenharmony_ci unsigned long nr_pages, bool onlined) 114162306a36Sopenharmony_ci{ 114262306a36Sopenharmony_ci for (; nr_pages--; pfn++) { 114362306a36Sopenharmony_ci struct page *page = pfn_to_page(pfn); 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci __ClearPageOffline(page); 114662306a36Sopenharmony_ci if (!onlined) 114762306a36Sopenharmony_ci ClearPageDirty(page); 114862306a36Sopenharmony_ci } 114962306a36Sopenharmony_ci} 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci/* 115262306a36Sopenharmony_ci * Release a range of fake-offline pages to the buddy, effectively 115362306a36Sopenharmony_ci * fake-onlining them. 115462306a36Sopenharmony_ci */ 115562306a36Sopenharmony_cistatic void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) 115662306a36Sopenharmony_ci{ 115762306a36Sopenharmony_ci unsigned long order = MAX_ORDER; 115862306a36Sopenharmony_ci unsigned long i; 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci /* 116162306a36Sopenharmony_ci * We might get called for ranges that don't cover properly aligned 116262306a36Sopenharmony_ci * MAX_ORDER pages; however, we can only online properly aligned 116362306a36Sopenharmony_ci * pages with an order of MAX_ORDER at maximum. 116462306a36Sopenharmony_ci */ 116562306a36Sopenharmony_ci while (!IS_ALIGNED(pfn | nr_pages, 1 << order)) 116662306a36Sopenharmony_ci order--; 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci for (i = 0; i < nr_pages; i += 1 << order) { 116962306a36Sopenharmony_ci struct page *page = pfn_to_page(pfn + i); 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci /* 117262306a36Sopenharmony_ci * If the page is PageDirty(), it was kept fake-offline when 117362306a36Sopenharmony_ci * onlining the memory block. Otherwise, it was allocated 117462306a36Sopenharmony_ci * using alloc_contig_range(). All pages in a subblock are 117562306a36Sopenharmony_ci * alike. 117662306a36Sopenharmony_ci */ 117762306a36Sopenharmony_ci if (PageDirty(page)) { 117862306a36Sopenharmony_ci virtio_mem_clear_fake_offline(pfn + i, 1 << order, false); 117962306a36Sopenharmony_ci generic_online_page(page, order); 118062306a36Sopenharmony_ci } else { 118162306a36Sopenharmony_ci virtio_mem_clear_fake_offline(pfn + i, 1 << order, true); 118262306a36Sopenharmony_ci free_contig_range(pfn + i, 1 << order); 118362306a36Sopenharmony_ci adjust_managed_page_count(page, 1 << order); 118462306a36Sopenharmony_ci } 118562306a36Sopenharmony_ci } 118662306a36Sopenharmony_ci} 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci/* 118962306a36Sopenharmony_ci * Try to allocate a range, marking pages fake-offline, effectively 119062306a36Sopenharmony_ci * fake-offlining them. 119162306a36Sopenharmony_ci */ 119262306a36Sopenharmony_cistatic int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn, 119362306a36Sopenharmony_ci unsigned long nr_pages) 119462306a36Sopenharmony_ci{ 119562306a36Sopenharmony_ci const bool is_movable = is_zone_movable_page(pfn_to_page(pfn)); 119662306a36Sopenharmony_ci int rc, retry_count; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci /* 119962306a36Sopenharmony_ci * TODO: We want an alloc_contig_range() mode that tries to allocate 120062306a36Sopenharmony_ci * harder (e.g., dealing with temporarily pinned pages, PCP), especially 120162306a36Sopenharmony_ci * with ZONE_MOVABLE. So for now, retry a couple of times with 120262306a36Sopenharmony_ci * ZONE_MOVABLE before giving up - because that zone is supposed to give 120362306a36Sopenharmony_ci * some guarantees. 120462306a36Sopenharmony_ci */ 120562306a36Sopenharmony_ci for (retry_count = 0; retry_count < 5; retry_count++) { 120662306a36Sopenharmony_ci /* 120762306a36Sopenharmony_ci * If the config changed, stop immediately and go back to the 120862306a36Sopenharmony_ci * main loop: avoid trying to keep unplugging if the device 120962306a36Sopenharmony_ci * might have decided to not remove any more memory. 121062306a36Sopenharmony_ci */ 121162306a36Sopenharmony_ci if (atomic_read(&vm->config_changed)) 121262306a36Sopenharmony_ci return -EAGAIN; 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, 121562306a36Sopenharmony_ci GFP_KERNEL); 121662306a36Sopenharmony_ci if (rc == -ENOMEM) 121762306a36Sopenharmony_ci /* whoops, out of memory */ 121862306a36Sopenharmony_ci return rc; 121962306a36Sopenharmony_ci else if (rc && !is_movable) 122062306a36Sopenharmony_ci break; 122162306a36Sopenharmony_ci else if (rc) 122262306a36Sopenharmony_ci continue; 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci virtio_mem_set_fake_offline(pfn, nr_pages, true); 122562306a36Sopenharmony_ci adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); 122662306a36Sopenharmony_ci return 0; 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci return -EBUSY; 123062306a36Sopenharmony_ci} 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci/* 123362306a36Sopenharmony_ci * Handle fake-offline pages when memory is going offline - such that the 123462306a36Sopenharmony_ci * pages can be skipped by mm-core when offlining. 123562306a36Sopenharmony_ci */ 123662306a36Sopenharmony_cistatic void virtio_mem_fake_offline_going_offline(unsigned long pfn, 123762306a36Sopenharmony_ci unsigned long nr_pages) 123862306a36Sopenharmony_ci{ 123962306a36Sopenharmony_ci struct page *page; 124062306a36Sopenharmony_ci unsigned long i; 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci /* 124362306a36Sopenharmony_ci * Drop our reference to the pages so the memory can get offlined 124462306a36Sopenharmony_ci * and add the unplugged pages to the managed page counters (so 124562306a36Sopenharmony_ci * offlining code can correctly subtract them again). 124662306a36Sopenharmony_ci */ 124762306a36Sopenharmony_ci adjust_managed_page_count(pfn_to_page(pfn), nr_pages); 124862306a36Sopenharmony_ci /* Drop our reference to the pages so the memory can get offlined. */ 124962306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 125062306a36Sopenharmony_ci page = pfn_to_page(pfn + i); 125162306a36Sopenharmony_ci if (WARN_ON(!page_ref_dec_and_test(page))) 125262306a36Sopenharmony_ci dump_page(page, "fake-offline page referenced"); 125362306a36Sopenharmony_ci } 125462306a36Sopenharmony_ci} 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci/* 125762306a36Sopenharmony_ci * Handle fake-offline pages when memory offlining is canceled - to undo 125862306a36Sopenharmony_ci * what we did in virtio_mem_fake_offline_going_offline(). 125962306a36Sopenharmony_ci */ 126062306a36Sopenharmony_cistatic void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, 126162306a36Sopenharmony_ci unsigned long nr_pages) 126262306a36Sopenharmony_ci{ 126362306a36Sopenharmony_ci unsigned long i; 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci /* 126662306a36Sopenharmony_ci * Get the reference we dropped when going offline and subtract the 126762306a36Sopenharmony_ci * unplugged pages from the managed page counters. 126862306a36Sopenharmony_ci */ 126962306a36Sopenharmony_ci adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); 127062306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) 127162306a36Sopenharmony_ci page_ref_inc(pfn_to_page(pfn + i)); 127262306a36Sopenharmony_ci} 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_cistatic void virtio_mem_online_page(struct virtio_mem *vm, 127562306a36Sopenharmony_ci struct page *page, unsigned int order) 127662306a36Sopenharmony_ci{ 127762306a36Sopenharmony_ci const unsigned long start = page_to_phys(page); 127862306a36Sopenharmony_ci const unsigned long end = start + PFN_PHYS(1 << order); 127962306a36Sopenharmony_ci unsigned long addr, next, id, sb_id, count; 128062306a36Sopenharmony_ci bool do_online; 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci /* 128362306a36Sopenharmony_ci * We can get called with any order up to MAX_ORDER. If our subblock 128462306a36Sopenharmony_ci * size is smaller than that and we have a mixture of plugged and 128562306a36Sopenharmony_ci * unplugged subblocks within such a page, we have to process in 128662306a36Sopenharmony_ci * smaller granularity. In that case we'll adjust the order exactly once 128762306a36Sopenharmony_ci * within the loop. 128862306a36Sopenharmony_ci */ 128962306a36Sopenharmony_ci for (addr = start; addr < end; ) { 129062306a36Sopenharmony_ci next = addr + PFN_PHYS(1 << order); 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci if (vm->in_sbm) { 129362306a36Sopenharmony_ci id = virtio_mem_phys_to_mb_id(addr); 129462306a36Sopenharmony_ci sb_id = virtio_mem_phys_to_sb_id(vm, addr); 129562306a36Sopenharmony_ci count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1; 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) { 129862306a36Sopenharmony_ci /* Fully plugged. */ 129962306a36Sopenharmony_ci do_online = true; 130062306a36Sopenharmony_ci } else if (count == 1 || 130162306a36Sopenharmony_ci virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) { 130262306a36Sopenharmony_ci /* Fully unplugged. */ 130362306a36Sopenharmony_ci do_online = false; 130462306a36Sopenharmony_ci } else { 130562306a36Sopenharmony_ci /* 130662306a36Sopenharmony_ci * Mixture, process sub-blocks instead. This 130762306a36Sopenharmony_ci * will be at least the size of a pageblock. 130862306a36Sopenharmony_ci * We'll run into this case exactly once. 130962306a36Sopenharmony_ci */ 131062306a36Sopenharmony_ci order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT; 131162306a36Sopenharmony_ci do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1); 131262306a36Sopenharmony_ci continue; 131362306a36Sopenharmony_ci } 131462306a36Sopenharmony_ci } else { 131562306a36Sopenharmony_ci /* 131662306a36Sopenharmony_ci * If the whole block is marked fake offline, keep 131762306a36Sopenharmony_ci * everything that way. 131862306a36Sopenharmony_ci */ 131962306a36Sopenharmony_ci id = virtio_mem_phys_to_bb_id(vm, addr); 132062306a36Sopenharmony_ci do_online = virtio_mem_bbm_get_bb_state(vm, id) != 132162306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; 132262306a36Sopenharmony_ci } 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci if (do_online) 132562306a36Sopenharmony_ci generic_online_page(pfn_to_page(PFN_DOWN(addr)), order); 132662306a36Sopenharmony_ci else 132762306a36Sopenharmony_ci virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, 132862306a36Sopenharmony_ci false); 132962306a36Sopenharmony_ci addr = next; 133062306a36Sopenharmony_ci } 133162306a36Sopenharmony_ci} 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_cistatic void virtio_mem_online_page_cb(struct page *page, unsigned int order) 133462306a36Sopenharmony_ci{ 133562306a36Sopenharmony_ci const unsigned long addr = page_to_phys(page); 133662306a36Sopenharmony_ci struct virtio_mem *vm; 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci rcu_read_lock(); 133962306a36Sopenharmony_ci list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { 134062306a36Sopenharmony_ci /* 134162306a36Sopenharmony_ci * Pages we're onlining will never cross memory blocks and, 134262306a36Sopenharmony_ci * therefore, not virtio-mem devices. 134362306a36Sopenharmony_ci */ 134462306a36Sopenharmony_ci if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) 134562306a36Sopenharmony_ci continue; 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci /* 134862306a36Sopenharmony_ci * virtio_mem_set_fake_offline() might sleep. We can safely 134962306a36Sopenharmony_ci * drop the RCU lock at this point because the device 135062306a36Sopenharmony_ci * cannot go away. See virtio_mem_remove() how races 135162306a36Sopenharmony_ci * between memory onlining and device removal are handled. 135262306a36Sopenharmony_ci */ 135362306a36Sopenharmony_ci rcu_read_unlock(); 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci virtio_mem_online_page(vm, page, order); 135662306a36Sopenharmony_ci return; 135762306a36Sopenharmony_ci } 135862306a36Sopenharmony_ci rcu_read_unlock(); 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci /* not virtio-mem memory, but e.g., a DIMM. online it */ 136162306a36Sopenharmony_ci generic_online_page(page, order); 136262306a36Sopenharmony_ci} 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_cistatic uint64_t virtio_mem_send_request(struct virtio_mem *vm, 136562306a36Sopenharmony_ci const struct virtio_mem_req *req) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci struct scatterlist *sgs[2], sg_req, sg_resp; 136862306a36Sopenharmony_ci unsigned int len; 136962306a36Sopenharmony_ci int rc; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci /* don't use the request residing on the stack (vaddr) */ 137262306a36Sopenharmony_ci vm->req = *req; 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci /* out: buffer for request */ 137562306a36Sopenharmony_ci sg_init_one(&sg_req, &vm->req, sizeof(vm->req)); 137662306a36Sopenharmony_ci sgs[0] = &sg_req; 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci /* in: buffer for response */ 137962306a36Sopenharmony_ci sg_init_one(&sg_resp, &vm->resp, sizeof(vm->resp)); 138062306a36Sopenharmony_ci sgs[1] = &sg_resp; 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_ci rc = virtqueue_add_sgs(vm->vq, sgs, 1, 1, vm, GFP_KERNEL); 138362306a36Sopenharmony_ci if (rc < 0) 138462306a36Sopenharmony_ci return rc; 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci virtqueue_kick(vm->vq); 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci /* wait for a response */ 138962306a36Sopenharmony_ci wait_event(vm->host_resp, virtqueue_get_buf(vm->vq, &len)); 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci return virtio16_to_cpu(vm->vdev, vm->resp.type); 139262306a36Sopenharmony_ci} 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_cistatic int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr, 139562306a36Sopenharmony_ci uint64_t size) 139662306a36Sopenharmony_ci{ 139762306a36Sopenharmony_ci const uint64_t nb_vm_blocks = size / vm->device_block_size; 139862306a36Sopenharmony_ci const struct virtio_mem_req req = { 139962306a36Sopenharmony_ci .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_PLUG), 140062306a36Sopenharmony_ci .u.plug.addr = cpu_to_virtio64(vm->vdev, addr), 140162306a36Sopenharmony_ci .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), 140262306a36Sopenharmony_ci }; 140362306a36Sopenharmony_ci int rc = -ENOMEM; 140462306a36Sopenharmony_ci 140562306a36Sopenharmony_ci if (atomic_read(&vm->config_changed)) 140662306a36Sopenharmony_ci return -EAGAIN; 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr, 140962306a36Sopenharmony_ci addr + size - 1); 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci switch (virtio_mem_send_request(vm, &req)) { 141262306a36Sopenharmony_ci case VIRTIO_MEM_RESP_ACK: 141362306a36Sopenharmony_ci vm->plugged_size += size; 141462306a36Sopenharmony_ci return 0; 141562306a36Sopenharmony_ci case VIRTIO_MEM_RESP_NACK: 141662306a36Sopenharmony_ci rc = -EAGAIN; 141762306a36Sopenharmony_ci break; 141862306a36Sopenharmony_ci case VIRTIO_MEM_RESP_BUSY: 141962306a36Sopenharmony_ci rc = -ETXTBSY; 142062306a36Sopenharmony_ci break; 142162306a36Sopenharmony_ci case VIRTIO_MEM_RESP_ERROR: 142262306a36Sopenharmony_ci rc = -EINVAL; 142362306a36Sopenharmony_ci break; 142462306a36Sopenharmony_ci default: 142562306a36Sopenharmony_ci break; 142662306a36Sopenharmony_ci } 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc); 142962306a36Sopenharmony_ci return rc; 143062306a36Sopenharmony_ci} 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_cistatic int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, 143362306a36Sopenharmony_ci uint64_t size) 143462306a36Sopenharmony_ci{ 143562306a36Sopenharmony_ci const uint64_t nb_vm_blocks = size / vm->device_block_size; 143662306a36Sopenharmony_ci const struct virtio_mem_req req = { 143762306a36Sopenharmony_ci .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG), 143862306a36Sopenharmony_ci .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr), 143962306a36Sopenharmony_ci .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), 144062306a36Sopenharmony_ci }; 144162306a36Sopenharmony_ci int rc = -ENOMEM; 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_ci if (atomic_read(&vm->config_changed)) 144462306a36Sopenharmony_ci return -EAGAIN; 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr, 144762306a36Sopenharmony_ci addr + size - 1); 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci switch (virtio_mem_send_request(vm, &req)) { 145062306a36Sopenharmony_ci case VIRTIO_MEM_RESP_ACK: 145162306a36Sopenharmony_ci vm->plugged_size -= size; 145262306a36Sopenharmony_ci return 0; 145362306a36Sopenharmony_ci case VIRTIO_MEM_RESP_BUSY: 145462306a36Sopenharmony_ci rc = -ETXTBSY; 145562306a36Sopenharmony_ci break; 145662306a36Sopenharmony_ci case VIRTIO_MEM_RESP_ERROR: 145762306a36Sopenharmony_ci rc = -EINVAL; 145862306a36Sopenharmony_ci break; 145962306a36Sopenharmony_ci default: 146062306a36Sopenharmony_ci break; 146162306a36Sopenharmony_ci } 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc); 146462306a36Sopenharmony_ci return rc; 146562306a36Sopenharmony_ci} 146662306a36Sopenharmony_ci 146762306a36Sopenharmony_cistatic int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) 146862306a36Sopenharmony_ci{ 146962306a36Sopenharmony_ci const struct virtio_mem_req req = { 147062306a36Sopenharmony_ci .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL), 147162306a36Sopenharmony_ci }; 147262306a36Sopenharmony_ci int rc = -ENOMEM; 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "unplugging all memory"); 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci switch (virtio_mem_send_request(vm, &req)) { 147762306a36Sopenharmony_ci case VIRTIO_MEM_RESP_ACK: 147862306a36Sopenharmony_ci vm->unplug_all_required = false; 147962306a36Sopenharmony_ci vm->plugged_size = 0; 148062306a36Sopenharmony_ci /* usable region might have shrunk */ 148162306a36Sopenharmony_ci atomic_set(&vm->config_changed, 1); 148262306a36Sopenharmony_ci return 0; 148362306a36Sopenharmony_ci case VIRTIO_MEM_RESP_BUSY: 148462306a36Sopenharmony_ci rc = -ETXTBSY; 148562306a36Sopenharmony_ci break; 148662306a36Sopenharmony_ci default: 148762306a36Sopenharmony_ci break; 148862306a36Sopenharmony_ci } 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc); 149162306a36Sopenharmony_ci return rc; 149262306a36Sopenharmony_ci} 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci/* 149562306a36Sopenharmony_ci * Plug selected subblocks. Updates the plugged state, but not the state 149662306a36Sopenharmony_ci * of the memory block. 149762306a36Sopenharmony_ci */ 149862306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id, 149962306a36Sopenharmony_ci int sb_id, int count) 150062306a36Sopenharmony_ci{ 150162306a36Sopenharmony_ci const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + 150262306a36Sopenharmony_ci sb_id * vm->sbm.sb_size; 150362306a36Sopenharmony_ci const uint64_t size = count * vm->sbm.sb_size; 150462306a36Sopenharmony_ci int rc; 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci rc = virtio_mem_send_plug_request(vm, addr, size); 150762306a36Sopenharmony_ci if (!rc) 150862306a36Sopenharmony_ci virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count); 150962306a36Sopenharmony_ci return rc; 151062306a36Sopenharmony_ci} 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_ci/* 151362306a36Sopenharmony_ci * Unplug selected subblocks. Updates the plugged state, but not the state 151462306a36Sopenharmony_ci * of the memory block. 151562306a36Sopenharmony_ci */ 151662306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, 151762306a36Sopenharmony_ci int sb_id, int count) 151862306a36Sopenharmony_ci{ 151962306a36Sopenharmony_ci const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + 152062306a36Sopenharmony_ci sb_id * vm->sbm.sb_size; 152162306a36Sopenharmony_ci const uint64_t size = count * vm->sbm.sb_size; 152262306a36Sopenharmony_ci int rc; 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci rc = virtio_mem_send_unplug_request(vm, addr, size); 152562306a36Sopenharmony_ci if (!rc) 152662306a36Sopenharmony_ci virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count); 152762306a36Sopenharmony_ci return rc; 152862306a36Sopenharmony_ci} 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci/* 153162306a36Sopenharmony_ci * Request to unplug a big block. 153262306a36Sopenharmony_ci * 153362306a36Sopenharmony_ci * Will not modify the state of the big block. 153462306a36Sopenharmony_ci */ 153562306a36Sopenharmony_cistatic int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) 153662306a36Sopenharmony_ci{ 153762306a36Sopenharmony_ci const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 153862306a36Sopenharmony_ci const uint64_t size = vm->bbm.bb_size; 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci return virtio_mem_send_unplug_request(vm, addr, size); 154162306a36Sopenharmony_ci} 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci/* 154462306a36Sopenharmony_ci * Request to plug a big block. 154562306a36Sopenharmony_ci * 154662306a36Sopenharmony_ci * Will not modify the state of the big block. 154762306a36Sopenharmony_ci */ 154862306a36Sopenharmony_cistatic int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) 154962306a36Sopenharmony_ci{ 155062306a36Sopenharmony_ci const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 155162306a36Sopenharmony_ci const uint64_t size = vm->bbm.bb_size; 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci return virtio_mem_send_plug_request(vm, addr, size); 155462306a36Sopenharmony_ci} 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci/* 155762306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of a offline or not-added 155862306a36Sopenharmony_ci * memory block. Will fail if any subblock cannot get unplugged (instead of 155962306a36Sopenharmony_ci * skipping it). 156062306a36Sopenharmony_ci * 156162306a36Sopenharmony_ci * Will not modify the state of the memory block. 156262306a36Sopenharmony_ci * 156362306a36Sopenharmony_ci * Note: can fail after some subblocks were unplugged. 156462306a36Sopenharmony_ci */ 156562306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm, 156662306a36Sopenharmony_ci unsigned long mb_id, uint64_t *nb_sb) 156762306a36Sopenharmony_ci{ 156862306a36Sopenharmony_ci int sb_id, count; 156962306a36Sopenharmony_ci int rc; 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci sb_id = vm->sbm.sbs_per_mb - 1; 157262306a36Sopenharmony_ci while (*nb_sb) { 157362306a36Sopenharmony_ci /* Find the next candidate subblock */ 157462306a36Sopenharmony_ci while (sb_id >= 0 && 157562306a36Sopenharmony_ci virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1)) 157662306a36Sopenharmony_ci sb_id--; 157762306a36Sopenharmony_ci if (sb_id < 0) 157862306a36Sopenharmony_ci break; 157962306a36Sopenharmony_ci /* Try to unplug multiple subblocks at a time */ 158062306a36Sopenharmony_ci count = 1; 158162306a36Sopenharmony_ci while (count < *nb_sb && sb_id > 0 && 158262306a36Sopenharmony_ci virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { 158362306a36Sopenharmony_ci count++; 158462306a36Sopenharmony_ci sb_id--; 158562306a36Sopenharmony_ci } 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); 158862306a36Sopenharmony_ci if (rc) 158962306a36Sopenharmony_ci return rc; 159062306a36Sopenharmony_ci *nb_sb -= count; 159162306a36Sopenharmony_ci sb_id--; 159262306a36Sopenharmony_ci } 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci return 0; 159562306a36Sopenharmony_ci} 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci/* 159862306a36Sopenharmony_ci * Unplug all plugged subblocks of an offline or not-added memory block. 159962306a36Sopenharmony_ci * 160062306a36Sopenharmony_ci * Will not modify the state of the memory block. 160162306a36Sopenharmony_ci * 160262306a36Sopenharmony_ci * Note: can fail after some subblocks were unplugged. 160362306a36Sopenharmony_ci */ 160462306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id) 160562306a36Sopenharmony_ci{ 160662306a36Sopenharmony_ci uint64_t nb_sb = vm->sbm.sbs_per_mb; 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb); 160962306a36Sopenharmony_ci} 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_ci/* 161262306a36Sopenharmony_ci * Prepare tracking data for the next memory block. 161362306a36Sopenharmony_ci */ 161462306a36Sopenharmony_cistatic int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm, 161562306a36Sopenharmony_ci unsigned long *mb_id) 161662306a36Sopenharmony_ci{ 161762306a36Sopenharmony_ci int rc; 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id) 162062306a36Sopenharmony_ci return -ENOSPC; 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci /* Resize the state array if required. */ 162362306a36Sopenharmony_ci rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm); 162462306a36Sopenharmony_ci if (rc) 162562306a36Sopenharmony_ci return rc; 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci /* Resize the subblock bitmap if required. */ 162862306a36Sopenharmony_ci rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm); 162962306a36Sopenharmony_ci if (rc) 163062306a36Sopenharmony_ci return rc; 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; 163362306a36Sopenharmony_ci *mb_id = vm->sbm.next_mb_id++; 163462306a36Sopenharmony_ci return 0; 163562306a36Sopenharmony_ci} 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ci/* 163862306a36Sopenharmony_ci * Try to plug the desired number of subblocks and add the memory block 163962306a36Sopenharmony_ci * to Linux. 164062306a36Sopenharmony_ci * 164162306a36Sopenharmony_ci * Will modify the state of the memory block. 164262306a36Sopenharmony_ci */ 164362306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, 164462306a36Sopenharmony_ci unsigned long mb_id, uint64_t *nb_sb) 164562306a36Sopenharmony_ci{ 164662306a36Sopenharmony_ci const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); 164762306a36Sopenharmony_ci int rc; 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci if (WARN_ON_ONCE(!count)) 165062306a36Sopenharmony_ci return -EINVAL; 165162306a36Sopenharmony_ci 165262306a36Sopenharmony_ci /* 165362306a36Sopenharmony_ci * Plug the requested number of subblocks before adding it to linux, 165462306a36Sopenharmony_ci * so that onlining will directly online all plugged subblocks. 165562306a36Sopenharmony_ci */ 165662306a36Sopenharmony_ci rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count); 165762306a36Sopenharmony_ci if (rc) 165862306a36Sopenharmony_ci return rc; 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci /* 166162306a36Sopenharmony_ci * Mark the block properly offline before adding it to Linux, 166262306a36Sopenharmony_ci * so the memory notifiers will find the block in the right state. 166362306a36Sopenharmony_ci */ 166462306a36Sopenharmony_ci if (count == vm->sbm.sbs_per_mb) 166562306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 166662306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE); 166762306a36Sopenharmony_ci else 166862306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 166962306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci /* Add the memory block to linux - if that fails, try to unplug. */ 167262306a36Sopenharmony_ci rc = virtio_mem_sbm_add_mb(vm, mb_id); 167362306a36Sopenharmony_ci if (rc) { 167462306a36Sopenharmony_ci int new_state = VIRTIO_MEM_SBM_MB_UNUSED; 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) 167762306a36Sopenharmony_ci new_state = VIRTIO_MEM_SBM_MB_PLUGGED; 167862306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); 167962306a36Sopenharmony_ci return rc; 168062306a36Sopenharmony_ci } 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_ci *nb_sb -= count; 168362306a36Sopenharmony_ci return 0; 168462306a36Sopenharmony_ci} 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_ci/* 168762306a36Sopenharmony_ci * Try to plug the desired number of subblocks of a memory block that 168862306a36Sopenharmony_ci * is already added to Linux. 168962306a36Sopenharmony_ci * 169062306a36Sopenharmony_ci * Will modify the state of the memory block. 169162306a36Sopenharmony_ci * 169262306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully plugged. 169362306a36Sopenharmony_ci */ 169462306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, 169562306a36Sopenharmony_ci unsigned long mb_id, uint64_t *nb_sb) 169662306a36Sopenharmony_ci{ 169762306a36Sopenharmony_ci const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); 169862306a36Sopenharmony_ci unsigned long pfn, nr_pages; 169962306a36Sopenharmony_ci int sb_id, count; 170062306a36Sopenharmony_ci int rc; 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci if (WARN_ON_ONCE(!*nb_sb)) 170362306a36Sopenharmony_ci return -EINVAL; 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_ci while (*nb_sb) { 170662306a36Sopenharmony_ci sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); 170762306a36Sopenharmony_ci if (sb_id >= vm->sbm.sbs_per_mb) 170862306a36Sopenharmony_ci break; 170962306a36Sopenharmony_ci count = 1; 171062306a36Sopenharmony_ci while (count < *nb_sb && 171162306a36Sopenharmony_ci sb_id + count < vm->sbm.sbs_per_mb && 171262306a36Sopenharmony_ci !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) 171362306a36Sopenharmony_ci count++; 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count); 171662306a36Sopenharmony_ci if (rc) 171762306a36Sopenharmony_ci return rc; 171862306a36Sopenharmony_ci *nb_sb -= count; 171962306a36Sopenharmony_ci if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) 172062306a36Sopenharmony_ci continue; 172162306a36Sopenharmony_ci 172262306a36Sopenharmony_ci /* fake-online the pages if the memory block is online */ 172362306a36Sopenharmony_ci pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 172462306a36Sopenharmony_ci sb_id * vm->sbm.sb_size); 172562306a36Sopenharmony_ci nr_pages = PFN_DOWN(count * vm->sbm.sb_size); 172662306a36Sopenharmony_ci virtio_mem_fake_online(pfn, nr_pages); 172762306a36Sopenharmony_ci } 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) 173062306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1); 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci return 0; 173362306a36Sopenharmony_ci} 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) 173662306a36Sopenharmony_ci{ 173762306a36Sopenharmony_ci const int mb_states[] = { 173862306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, 173962306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, 174062306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, 174162306a36Sopenharmony_ci }; 174262306a36Sopenharmony_ci uint64_t nb_sb = diff / vm->sbm.sb_size; 174362306a36Sopenharmony_ci unsigned long mb_id; 174462306a36Sopenharmony_ci int rc, i; 174562306a36Sopenharmony_ci 174662306a36Sopenharmony_ci if (!nb_sb) 174762306a36Sopenharmony_ci return 0; 174862306a36Sopenharmony_ci 174962306a36Sopenharmony_ci /* Don't race with onlining/offlining */ 175062306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(mb_states); i++) { 175362306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) { 175462306a36Sopenharmony_ci rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb); 175562306a36Sopenharmony_ci if (rc || !nb_sb) 175662306a36Sopenharmony_ci goto out_unlock; 175762306a36Sopenharmony_ci cond_resched(); 175862306a36Sopenharmony_ci } 175962306a36Sopenharmony_ci } 176062306a36Sopenharmony_ci 176162306a36Sopenharmony_ci /* 176262306a36Sopenharmony_ci * We won't be working on online/offline memory blocks from this point, 176362306a36Sopenharmony_ci * so we can't race with memory onlining/offlining. Drop the mutex. 176462306a36Sopenharmony_ci */ 176562306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_ci /* Try to plug and add unused blocks */ 176862306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) { 176962306a36Sopenharmony_ci if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) 177062306a36Sopenharmony_ci return -ENOSPC; 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_ci rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); 177362306a36Sopenharmony_ci if (rc || !nb_sb) 177462306a36Sopenharmony_ci return rc; 177562306a36Sopenharmony_ci cond_resched(); 177662306a36Sopenharmony_ci } 177762306a36Sopenharmony_ci 177862306a36Sopenharmony_ci /* Try to prepare, plug and add new blocks */ 177962306a36Sopenharmony_ci while (nb_sb) { 178062306a36Sopenharmony_ci if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) 178162306a36Sopenharmony_ci return -ENOSPC; 178262306a36Sopenharmony_ci 178362306a36Sopenharmony_ci rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id); 178462306a36Sopenharmony_ci if (rc) 178562306a36Sopenharmony_ci return rc; 178662306a36Sopenharmony_ci rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); 178762306a36Sopenharmony_ci if (rc) 178862306a36Sopenharmony_ci return rc; 178962306a36Sopenharmony_ci cond_resched(); 179062306a36Sopenharmony_ci } 179162306a36Sopenharmony_ci 179262306a36Sopenharmony_ci return 0; 179362306a36Sopenharmony_ciout_unlock: 179462306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 179562306a36Sopenharmony_ci return rc; 179662306a36Sopenharmony_ci} 179762306a36Sopenharmony_ci 179862306a36Sopenharmony_ci/* 179962306a36Sopenharmony_ci * Plug a big block and add it to Linux. 180062306a36Sopenharmony_ci * 180162306a36Sopenharmony_ci * Will modify the state of the big block. 180262306a36Sopenharmony_ci */ 180362306a36Sopenharmony_cistatic int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm, 180462306a36Sopenharmony_ci unsigned long bb_id) 180562306a36Sopenharmony_ci{ 180662306a36Sopenharmony_ci int rc; 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != 180962306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_UNUSED)) 181062306a36Sopenharmony_ci return -EINVAL; 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci rc = virtio_mem_bbm_plug_bb(vm, bb_id); 181362306a36Sopenharmony_ci if (rc) 181462306a36Sopenharmony_ci return rc; 181562306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci rc = virtio_mem_bbm_add_bb(vm, bb_id); 181862306a36Sopenharmony_ci if (rc) { 181962306a36Sopenharmony_ci if (!virtio_mem_bbm_unplug_bb(vm, bb_id)) 182062306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, bb_id, 182162306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_UNUSED); 182262306a36Sopenharmony_ci else 182362306a36Sopenharmony_ci /* Retry from the main loop. */ 182462306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, bb_id, 182562306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_PLUGGED); 182662306a36Sopenharmony_ci return rc; 182762306a36Sopenharmony_ci } 182862306a36Sopenharmony_ci return 0; 182962306a36Sopenharmony_ci} 183062306a36Sopenharmony_ci 183162306a36Sopenharmony_ci/* 183262306a36Sopenharmony_ci * Prepare tracking data for the next big block. 183362306a36Sopenharmony_ci */ 183462306a36Sopenharmony_cistatic int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm, 183562306a36Sopenharmony_ci unsigned long *bb_id) 183662306a36Sopenharmony_ci{ 183762306a36Sopenharmony_ci int rc; 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id) 184062306a36Sopenharmony_ci return -ENOSPC; 184162306a36Sopenharmony_ci 184262306a36Sopenharmony_ci /* Resize the big block state array if required. */ 184362306a36Sopenharmony_ci rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm); 184462306a36Sopenharmony_ci if (rc) 184562306a36Sopenharmony_ci return rc; 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++; 184862306a36Sopenharmony_ci *bb_id = vm->bbm.next_bb_id; 184962306a36Sopenharmony_ci vm->bbm.next_bb_id++; 185062306a36Sopenharmony_ci return 0; 185162306a36Sopenharmony_ci} 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_cistatic int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff) 185462306a36Sopenharmony_ci{ 185562306a36Sopenharmony_ci uint64_t nb_bb = diff / vm->bbm.bb_size; 185662306a36Sopenharmony_ci unsigned long bb_id; 185762306a36Sopenharmony_ci int rc; 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci if (!nb_bb) 186062306a36Sopenharmony_ci return 0; 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci /* Try to plug and add unused big blocks */ 186362306a36Sopenharmony_ci virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) { 186462306a36Sopenharmony_ci if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) 186562306a36Sopenharmony_ci return -ENOSPC; 186662306a36Sopenharmony_ci 186762306a36Sopenharmony_ci rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); 186862306a36Sopenharmony_ci if (!rc) 186962306a36Sopenharmony_ci nb_bb--; 187062306a36Sopenharmony_ci if (rc || !nb_bb) 187162306a36Sopenharmony_ci return rc; 187262306a36Sopenharmony_ci cond_resched(); 187362306a36Sopenharmony_ci } 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci /* Try to prepare, plug and add new big blocks */ 187662306a36Sopenharmony_ci while (nb_bb) { 187762306a36Sopenharmony_ci if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) 187862306a36Sopenharmony_ci return -ENOSPC; 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id); 188162306a36Sopenharmony_ci if (rc) 188262306a36Sopenharmony_ci return rc; 188362306a36Sopenharmony_ci rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); 188462306a36Sopenharmony_ci if (!rc) 188562306a36Sopenharmony_ci nb_bb--; 188662306a36Sopenharmony_ci if (rc) 188762306a36Sopenharmony_ci return rc; 188862306a36Sopenharmony_ci cond_resched(); 188962306a36Sopenharmony_ci } 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci return 0; 189262306a36Sopenharmony_ci} 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_ci/* 189562306a36Sopenharmony_ci * Try to plug the requested amount of memory. 189662306a36Sopenharmony_ci */ 189762306a36Sopenharmony_cistatic int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) 189862306a36Sopenharmony_ci{ 189962306a36Sopenharmony_ci if (vm->in_sbm) 190062306a36Sopenharmony_ci return virtio_mem_sbm_plug_request(vm, diff); 190162306a36Sopenharmony_ci return virtio_mem_bbm_plug_request(vm, diff); 190262306a36Sopenharmony_ci} 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci/* 190562306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of an offline memory block. 190662306a36Sopenharmony_ci * Will fail if any subblock cannot get unplugged (instead of skipping it). 190762306a36Sopenharmony_ci * 190862306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the 190962306a36Sopenharmony_ci * hotplug_mutex. 191062306a36Sopenharmony_ci * 191162306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully unplugged. 191262306a36Sopenharmony_ci */ 191362306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, 191462306a36Sopenharmony_ci unsigned long mb_id, 191562306a36Sopenharmony_ci uint64_t *nb_sb) 191662306a36Sopenharmony_ci{ 191762306a36Sopenharmony_ci int rc; 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb); 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci /* some subblocks might have been unplugged even on failure */ 192262306a36Sopenharmony_ci if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) 192362306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 192462306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); 192562306a36Sopenharmony_ci if (rc) 192662306a36Sopenharmony_ci return rc; 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { 192962306a36Sopenharmony_ci /* 193062306a36Sopenharmony_ci * Remove the block from Linux - this should never fail. 193162306a36Sopenharmony_ci * Hinder the block from getting onlined by marking it 193262306a36Sopenharmony_ci * unplugged. Temporarily drop the mutex, so 193362306a36Sopenharmony_ci * any pending GOING_ONLINE requests can be serviced/rejected. 193462306a36Sopenharmony_ci */ 193562306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 193662306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_UNUSED); 193762306a36Sopenharmony_ci 193862306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 193962306a36Sopenharmony_ci rc = virtio_mem_sbm_remove_mb(vm, mb_id); 194062306a36Sopenharmony_ci BUG_ON(rc); 194162306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 194262306a36Sopenharmony_ci } 194362306a36Sopenharmony_ci return 0; 194462306a36Sopenharmony_ci} 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci/* 194762306a36Sopenharmony_ci * Unplug the given plugged subblocks of an online memory block. 194862306a36Sopenharmony_ci * 194962306a36Sopenharmony_ci * Will modify the state of the memory block. 195062306a36Sopenharmony_ci */ 195162306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, 195262306a36Sopenharmony_ci unsigned long mb_id, int sb_id, 195362306a36Sopenharmony_ci int count) 195462306a36Sopenharmony_ci{ 195562306a36Sopenharmony_ci const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; 195662306a36Sopenharmony_ci const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); 195762306a36Sopenharmony_ci unsigned long start_pfn; 195862306a36Sopenharmony_ci int rc; 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 196162306a36Sopenharmony_ci sb_id * vm->sbm.sb_size); 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages); 196462306a36Sopenharmony_ci if (rc) 196562306a36Sopenharmony_ci return rc; 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_ci /* Try to unplug the allocated memory */ 196862306a36Sopenharmony_ci rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); 196962306a36Sopenharmony_ci if (rc) { 197062306a36Sopenharmony_ci /* Return the memory to the buddy. */ 197162306a36Sopenharmony_ci virtio_mem_fake_online(start_pfn, nr_pages); 197262306a36Sopenharmony_ci return rc; 197362306a36Sopenharmony_ci } 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci switch (old_state) { 197662306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_KERNEL: 197762306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 197862306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL); 197962306a36Sopenharmony_ci break; 198062306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_MOVABLE: 198162306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 198262306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL); 198362306a36Sopenharmony_ci break; 198462306a36Sopenharmony_ci } 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci return 0; 198762306a36Sopenharmony_ci} 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci/* 199062306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of an online memory block. 199162306a36Sopenharmony_ci * Will skip subblock that are busy. 199262306a36Sopenharmony_ci * 199362306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the 199462306a36Sopenharmony_ci * hotplug_mutex. 199562306a36Sopenharmony_ci * 199662306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully unplugged. Can 199762306a36Sopenharmony_ci * return 0 even if subblocks were busy and could not get unplugged. 199862306a36Sopenharmony_ci */ 199962306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, 200062306a36Sopenharmony_ci unsigned long mb_id, 200162306a36Sopenharmony_ci uint64_t *nb_sb) 200262306a36Sopenharmony_ci{ 200362306a36Sopenharmony_ci int rc, sb_id; 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci /* If possible, try to unplug the complete block in one shot. */ 200662306a36Sopenharmony_ci if (*nb_sb >= vm->sbm.sbs_per_mb && 200762306a36Sopenharmony_ci virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { 200862306a36Sopenharmony_ci rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0, 200962306a36Sopenharmony_ci vm->sbm.sbs_per_mb); 201062306a36Sopenharmony_ci if (!rc) { 201162306a36Sopenharmony_ci *nb_sb -= vm->sbm.sbs_per_mb; 201262306a36Sopenharmony_ci goto unplugged; 201362306a36Sopenharmony_ci } else if (rc != -EBUSY) 201462306a36Sopenharmony_ci return rc; 201562306a36Sopenharmony_ci } 201662306a36Sopenharmony_ci 201762306a36Sopenharmony_ci /* Fallback to single subblocks. */ 201862306a36Sopenharmony_ci for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { 201962306a36Sopenharmony_ci /* Find the next candidate subblock */ 202062306a36Sopenharmony_ci while (sb_id >= 0 && 202162306a36Sopenharmony_ci !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) 202262306a36Sopenharmony_ci sb_id--; 202362306a36Sopenharmony_ci if (sb_id < 0) 202462306a36Sopenharmony_ci break; 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1); 202762306a36Sopenharmony_ci if (rc == -EBUSY) 202862306a36Sopenharmony_ci continue; 202962306a36Sopenharmony_ci else if (rc) 203062306a36Sopenharmony_ci return rc; 203162306a36Sopenharmony_ci *nb_sb -= 1; 203262306a36Sopenharmony_ci } 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_ciunplugged: 203562306a36Sopenharmony_ci rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id); 203662306a36Sopenharmony_ci if (rc) 203762306a36Sopenharmony_ci vm->sbm.have_unplugged_mb = 1; 203862306a36Sopenharmony_ci /* Ignore errors, this is not critical. We'll retry later. */ 203962306a36Sopenharmony_ci return 0; 204062306a36Sopenharmony_ci} 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci/* 204362306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of a memory block that is 204462306a36Sopenharmony_ci * already added to Linux. Will skip subblock of online memory blocks that are 204562306a36Sopenharmony_ci * busy (by the OS). Will fail if any subblock that's not busy cannot get 204662306a36Sopenharmony_ci * unplugged. 204762306a36Sopenharmony_ci * 204862306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the 204962306a36Sopenharmony_ci * hotplug_mutex. 205062306a36Sopenharmony_ci * 205162306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully unplugged. Can 205262306a36Sopenharmony_ci * return 0 even if subblocks were busy and could not get unplugged. 205362306a36Sopenharmony_ci */ 205462306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, 205562306a36Sopenharmony_ci unsigned long mb_id, 205662306a36Sopenharmony_ci uint64_t *nb_sb) 205762306a36Sopenharmony_ci{ 205862306a36Sopenharmony_ci const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); 205962306a36Sopenharmony_ci 206062306a36Sopenharmony_ci switch (old_state) { 206162306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: 206262306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_KERNEL: 206362306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: 206462306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_MOVABLE: 206562306a36Sopenharmony_ci return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb); 206662306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: 206762306a36Sopenharmony_ci case VIRTIO_MEM_SBM_MB_OFFLINE: 206862306a36Sopenharmony_ci return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb); 206962306a36Sopenharmony_ci } 207062306a36Sopenharmony_ci return -EINVAL; 207162306a36Sopenharmony_ci} 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) 207462306a36Sopenharmony_ci{ 207562306a36Sopenharmony_ci const int mb_states[] = { 207662306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, 207762306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE, 207862306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, 207962306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, 208062306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_MOVABLE, 208162306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_KERNEL, 208262306a36Sopenharmony_ci }; 208362306a36Sopenharmony_ci uint64_t nb_sb = diff / vm->sbm.sb_size; 208462306a36Sopenharmony_ci unsigned long mb_id; 208562306a36Sopenharmony_ci int rc, i; 208662306a36Sopenharmony_ci 208762306a36Sopenharmony_ci if (!nb_sb) 208862306a36Sopenharmony_ci return 0; 208962306a36Sopenharmony_ci 209062306a36Sopenharmony_ci /* 209162306a36Sopenharmony_ci * We'll drop the mutex a couple of times when it is safe to do so. 209262306a36Sopenharmony_ci * This might result in some blocks switching the state (online/offline) 209362306a36Sopenharmony_ci * and we could miss them in this run - we will retry again later. 209462306a36Sopenharmony_ci */ 209562306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci /* 209862306a36Sopenharmony_ci * We try unplug from partially plugged blocks first, to try removing 209962306a36Sopenharmony_ci * whole memory blocks along with metadata. We prioritize ZONE_MOVABLE 210062306a36Sopenharmony_ci * as it's more reliable to unplug memory and remove whole memory 210162306a36Sopenharmony_ci * blocks, and we don't want to trigger a zone imbalances by 210262306a36Sopenharmony_ci * accidentially removing too much kernel memory. 210362306a36Sopenharmony_ci */ 210462306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(mb_states); i++) { 210562306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) { 210662306a36Sopenharmony_ci rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); 210762306a36Sopenharmony_ci if (rc || !nb_sb) 210862306a36Sopenharmony_ci goto out_unlock; 210962306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 211062306a36Sopenharmony_ci cond_resched(); 211162306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 211262306a36Sopenharmony_ci } 211362306a36Sopenharmony_ci if (!unplug_online && i == 1) { 211462306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 211562306a36Sopenharmony_ci return 0; 211662306a36Sopenharmony_ci } 211762306a36Sopenharmony_ci } 211862306a36Sopenharmony_ci 211962306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 212062306a36Sopenharmony_ci return nb_sb ? -EBUSY : 0; 212162306a36Sopenharmony_ciout_unlock: 212262306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 212362306a36Sopenharmony_ci return rc; 212462306a36Sopenharmony_ci} 212562306a36Sopenharmony_ci 212662306a36Sopenharmony_ci/* 212762306a36Sopenharmony_ci * Try to offline and remove a big block from Linux and unplug it. Will fail 212862306a36Sopenharmony_ci * with -EBUSY if some memory is busy and cannot get unplugged. 212962306a36Sopenharmony_ci * 213062306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the 213162306a36Sopenharmony_ci * hotplug_mutex. 213262306a36Sopenharmony_ci */ 213362306a36Sopenharmony_cistatic int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, 213462306a36Sopenharmony_ci unsigned long bb_id) 213562306a36Sopenharmony_ci{ 213662306a36Sopenharmony_ci const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); 213762306a36Sopenharmony_ci const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); 213862306a36Sopenharmony_ci unsigned long end_pfn = start_pfn + nr_pages; 213962306a36Sopenharmony_ci unsigned long pfn; 214062306a36Sopenharmony_ci struct page *page; 214162306a36Sopenharmony_ci int rc; 214262306a36Sopenharmony_ci 214362306a36Sopenharmony_ci if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != 214462306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_ADDED)) 214562306a36Sopenharmony_ci return -EINVAL; 214662306a36Sopenharmony_ci 214762306a36Sopenharmony_ci /* 214862306a36Sopenharmony_ci * Start by fake-offlining all memory. Once we marked the device 214962306a36Sopenharmony_ci * block as fake-offline, all newly onlined memory will 215062306a36Sopenharmony_ci * automatically be kept fake-offline. Protect from concurrent 215162306a36Sopenharmony_ci * onlining/offlining until we have a consistent state. 215262306a36Sopenharmony_ci */ 215362306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 215462306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); 215562306a36Sopenharmony_ci 215662306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 215762306a36Sopenharmony_ci page = pfn_to_online_page(pfn); 215862306a36Sopenharmony_ci if (!page) 215962306a36Sopenharmony_ci continue; 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION); 216262306a36Sopenharmony_ci if (rc) { 216362306a36Sopenharmony_ci end_pfn = pfn; 216462306a36Sopenharmony_ci goto rollback; 216562306a36Sopenharmony_ci } 216662306a36Sopenharmony_ci } 216762306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); 217062306a36Sopenharmony_ci if (rc) { 217162306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 217262306a36Sopenharmony_ci goto rollback; 217362306a36Sopenharmony_ci } 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci rc = virtio_mem_bbm_unplug_bb(vm, bb_id); 217662306a36Sopenharmony_ci if (rc) 217762306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, bb_id, 217862306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_PLUGGED); 217962306a36Sopenharmony_ci else 218062306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, bb_id, 218162306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_UNUSED); 218262306a36Sopenharmony_ci return rc; 218362306a36Sopenharmony_ci 218462306a36Sopenharmony_cirollback: 218562306a36Sopenharmony_ci for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 218662306a36Sopenharmony_ci page = pfn_to_online_page(pfn); 218762306a36Sopenharmony_ci if (!page) 218862306a36Sopenharmony_ci continue; 218962306a36Sopenharmony_ci virtio_mem_fake_online(pfn, PAGES_PER_SECTION); 219062306a36Sopenharmony_ci } 219162306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); 219262306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 219362306a36Sopenharmony_ci return rc; 219462306a36Sopenharmony_ci} 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci/* 219762306a36Sopenharmony_ci * Test if a big block is completely offline. 219862306a36Sopenharmony_ci */ 219962306a36Sopenharmony_cistatic bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, 220062306a36Sopenharmony_ci unsigned long bb_id) 220162306a36Sopenharmony_ci{ 220262306a36Sopenharmony_ci const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); 220362306a36Sopenharmony_ci const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); 220462306a36Sopenharmony_ci unsigned long pfn; 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci for (pfn = start_pfn; pfn < start_pfn + nr_pages; 220762306a36Sopenharmony_ci pfn += PAGES_PER_SECTION) { 220862306a36Sopenharmony_ci if (pfn_to_online_page(pfn)) 220962306a36Sopenharmony_ci return false; 221062306a36Sopenharmony_ci } 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_ci return true; 221362306a36Sopenharmony_ci} 221462306a36Sopenharmony_ci 221562306a36Sopenharmony_ci/* 221662306a36Sopenharmony_ci * Test if a big block is completely onlined to ZONE_MOVABLE (or offline). 221762306a36Sopenharmony_ci */ 221862306a36Sopenharmony_cistatic bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm, 221962306a36Sopenharmony_ci unsigned long bb_id) 222062306a36Sopenharmony_ci{ 222162306a36Sopenharmony_ci const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); 222262306a36Sopenharmony_ci const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); 222362306a36Sopenharmony_ci struct page *page; 222462306a36Sopenharmony_ci unsigned long pfn; 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci for (pfn = start_pfn; pfn < start_pfn + nr_pages; 222762306a36Sopenharmony_ci pfn += PAGES_PER_SECTION) { 222862306a36Sopenharmony_ci page = pfn_to_online_page(pfn); 222962306a36Sopenharmony_ci if (!page) 223062306a36Sopenharmony_ci continue; 223162306a36Sopenharmony_ci if (page_zonenum(page) != ZONE_MOVABLE) 223262306a36Sopenharmony_ci return false; 223362306a36Sopenharmony_ci } 223462306a36Sopenharmony_ci 223562306a36Sopenharmony_ci return true; 223662306a36Sopenharmony_ci} 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_cistatic int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff) 223962306a36Sopenharmony_ci{ 224062306a36Sopenharmony_ci uint64_t nb_bb = diff / vm->bbm.bb_size; 224162306a36Sopenharmony_ci uint64_t bb_id; 224262306a36Sopenharmony_ci int rc, i; 224362306a36Sopenharmony_ci 224462306a36Sopenharmony_ci if (!nb_bb) 224562306a36Sopenharmony_ci return 0; 224662306a36Sopenharmony_ci 224762306a36Sopenharmony_ci /* 224862306a36Sopenharmony_ci * Try to unplug big blocks. Similar to SBM, start with offline 224962306a36Sopenharmony_ci * big blocks. 225062306a36Sopenharmony_ci */ 225162306a36Sopenharmony_ci for (i = 0; i < 3; i++) { 225262306a36Sopenharmony_ci virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { 225362306a36Sopenharmony_ci cond_resched(); 225462306a36Sopenharmony_ci 225562306a36Sopenharmony_ci /* 225662306a36Sopenharmony_ci * As we're holding no locks, these checks are racy, 225762306a36Sopenharmony_ci * but we don't care. 225862306a36Sopenharmony_ci */ 225962306a36Sopenharmony_ci if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id)) 226062306a36Sopenharmony_ci continue; 226162306a36Sopenharmony_ci if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id)) 226262306a36Sopenharmony_ci continue; 226362306a36Sopenharmony_ci rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); 226462306a36Sopenharmony_ci if (rc == -EBUSY) 226562306a36Sopenharmony_ci continue; 226662306a36Sopenharmony_ci if (!rc) 226762306a36Sopenharmony_ci nb_bb--; 226862306a36Sopenharmony_ci if (rc || !nb_bb) 226962306a36Sopenharmony_ci return rc; 227062306a36Sopenharmony_ci } 227162306a36Sopenharmony_ci if (i == 0 && !unplug_online) 227262306a36Sopenharmony_ci return 0; 227362306a36Sopenharmony_ci } 227462306a36Sopenharmony_ci 227562306a36Sopenharmony_ci return nb_bb ? -EBUSY : 0; 227662306a36Sopenharmony_ci} 227762306a36Sopenharmony_ci 227862306a36Sopenharmony_ci/* 227962306a36Sopenharmony_ci * Try to unplug the requested amount of memory. 228062306a36Sopenharmony_ci */ 228162306a36Sopenharmony_cistatic int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) 228262306a36Sopenharmony_ci{ 228362306a36Sopenharmony_ci if (vm->in_sbm) 228462306a36Sopenharmony_ci return virtio_mem_sbm_unplug_request(vm, diff); 228562306a36Sopenharmony_ci return virtio_mem_bbm_unplug_request(vm, diff); 228662306a36Sopenharmony_ci} 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci/* 228962306a36Sopenharmony_ci * Try to unplug all blocks that couldn't be unplugged before, for example, 229062306a36Sopenharmony_ci * because the hypervisor was busy. Further, offline and remove any memory 229162306a36Sopenharmony_ci * blocks where we previously failed. 229262306a36Sopenharmony_ci */ 229362306a36Sopenharmony_cistatic int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm) 229462306a36Sopenharmony_ci{ 229562306a36Sopenharmony_ci unsigned long id; 229662306a36Sopenharmony_ci int rc = 0; 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ci if (!vm->in_sbm) { 229962306a36Sopenharmony_ci virtio_mem_bbm_for_each_bb(vm, id, 230062306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_PLUGGED) { 230162306a36Sopenharmony_ci rc = virtio_mem_bbm_unplug_bb(vm, id); 230262306a36Sopenharmony_ci if (rc) 230362306a36Sopenharmony_ci return rc; 230462306a36Sopenharmony_ci virtio_mem_bbm_set_bb_state(vm, id, 230562306a36Sopenharmony_ci VIRTIO_MEM_BBM_BB_UNUSED); 230662306a36Sopenharmony_ci } 230762306a36Sopenharmony_ci return 0; 230862306a36Sopenharmony_ci } 230962306a36Sopenharmony_ci 231062306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) { 231162306a36Sopenharmony_ci rc = virtio_mem_sbm_unplug_mb(vm, id); 231262306a36Sopenharmony_ci if (rc) 231362306a36Sopenharmony_ci return rc; 231462306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, id, 231562306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_UNUSED); 231662306a36Sopenharmony_ci } 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci if (!vm->sbm.have_unplugged_mb) 231962306a36Sopenharmony_ci return 0; 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ci /* 232262306a36Sopenharmony_ci * Let's retry (offlining and) removing completely unplugged Linux 232362306a36Sopenharmony_ci * memory blocks. 232462306a36Sopenharmony_ci */ 232562306a36Sopenharmony_ci vm->sbm.have_unplugged_mb = false; 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 232862306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL) 232962306a36Sopenharmony_ci rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 233062306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL) 233162306a36Sopenharmony_ci rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 233262306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) 233362306a36Sopenharmony_ci rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 233462306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci if (rc) 233762306a36Sopenharmony_ci vm->sbm.have_unplugged_mb = true; 233862306a36Sopenharmony_ci /* Ignore errors, this is not critical. We'll retry later. */ 233962306a36Sopenharmony_ci return 0; 234062306a36Sopenharmony_ci} 234162306a36Sopenharmony_ci 234262306a36Sopenharmony_ci/* 234362306a36Sopenharmony_ci * Update all parts of the config that could have changed. 234462306a36Sopenharmony_ci */ 234562306a36Sopenharmony_cistatic void virtio_mem_refresh_config(struct virtio_mem *vm) 234662306a36Sopenharmony_ci{ 234762306a36Sopenharmony_ci const struct range pluggable_range = mhp_get_pluggable_range(true); 234862306a36Sopenharmony_ci uint64_t new_plugged_size, usable_region_size, end_addr; 234962306a36Sopenharmony_ci 235062306a36Sopenharmony_ci /* the plugged_size is just a reflection of what _we_ did previously */ 235162306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, 235262306a36Sopenharmony_ci &new_plugged_size); 235362306a36Sopenharmony_ci if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size)) 235462306a36Sopenharmony_ci vm->plugged_size = new_plugged_size; 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci /* calculate the last usable memory block id */ 235762306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, 235862306a36Sopenharmony_ci usable_region_size, &usable_region_size); 235962306a36Sopenharmony_ci end_addr = min(vm->addr + usable_region_size - 1, 236062306a36Sopenharmony_ci pluggable_range.end); 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci if (vm->in_sbm) { 236362306a36Sopenharmony_ci vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr); 236462306a36Sopenharmony_ci if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes())) 236562306a36Sopenharmony_ci vm->sbm.last_usable_mb_id--; 236662306a36Sopenharmony_ci } else { 236762306a36Sopenharmony_ci vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm, 236862306a36Sopenharmony_ci end_addr); 236962306a36Sopenharmony_ci if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size)) 237062306a36Sopenharmony_ci vm->bbm.last_usable_bb_id--; 237162306a36Sopenharmony_ci } 237262306a36Sopenharmony_ci /* 237362306a36Sopenharmony_ci * If we cannot plug any of our device memory (e.g., nothing in the 237462306a36Sopenharmony_ci * usable region is addressable), the last usable memory block id will 237562306a36Sopenharmony_ci * be smaller than the first usable memory block id. We'll stop 237662306a36Sopenharmony_ci * attempting to add memory with -ENOSPC from our main loop. 237762306a36Sopenharmony_ci */ 237862306a36Sopenharmony_ci 237962306a36Sopenharmony_ci /* see if there is a request to change the size */ 238062306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, 238162306a36Sopenharmony_ci &vm->requested_size); 238262306a36Sopenharmony_ci 238362306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size); 238462306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size); 238562306a36Sopenharmony_ci} 238662306a36Sopenharmony_ci 238762306a36Sopenharmony_ci/* 238862306a36Sopenharmony_ci * Workqueue function for handling plug/unplug requests and config updates. 238962306a36Sopenharmony_ci */ 239062306a36Sopenharmony_cistatic void virtio_mem_run_wq(struct work_struct *work) 239162306a36Sopenharmony_ci{ 239262306a36Sopenharmony_ci struct virtio_mem *vm = container_of(work, struct virtio_mem, wq); 239362306a36Sopenharmony_ci uint64_t diff; 239462306a36Sopenharmony_ci int rc; 239562306a36Sopenharmony_ci 239662306a36Sopenharmony_ci if (unlikely(vm->in_kdump)) { 239762306a36Sopenharmony_ci dev_warn_once(&vm->vdev->dev, 239862306a36Sopenharmony_ci "unexpected workqueue run in kdump kernel\n"); 239962306a36Sopenharmony_ci return; 240062306a36Sopenharmony_ci } 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci hrtimer_cancel(&vm->retry_timer); 240362306a36Sopenharmony_ci 240462306a36Sopenharmony_ci if (vm->broken) 240562306a36Sopenharmony_ci return; 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci atomic_set(&vm->wq_active, 1); 240862306a36Sopenharmony_ciretry: 240962306a36Sopenharmony_ci rc = 0; 241062306a36Sopenharmony_ci 241162306a36Sopenharmony_ci /* Make sure we start with a clean state if there are leftovers. */ 241262306a36Sopenharmony_ci if (unlikely(vm->unplug_all_required)) 241362306a36Sopenharmony_ci rc = virtio_mem_send_unplug_all_request(vm); 241462306a36Sopenharmony_ci 241562306a36Sopenharmony_ci if (atomic_read(&vm->config_changed)) { 241662306a36Sopenharmony_ci atomic_set(&vm->config_changed, 0); 241762306a36Sopenharmony_ci virtio_mem_refresh_config(vm); 241862306a36Sopenharmony_ci } 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_ci /* Cleanup any leftovers from previous runs */ 242162306a36Sopenharmony_ci if (!rc) 242262306a36Sopenharmony_ci rc = virtio_mem_cleanup_pending_mb(vm); 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_ci if (!rc && vm->requested_size != vm->plugged_size) { 242562306a36Sopenharmony_ci if (vm->requested_size > vm->plugged_size) { 242662306a36Sopenharmony_ci diff = vm->requested_size - vm->plugged_size; 242762306a36Sopenharmony_ci rc = virtio_mem_plug_request(vm, diff); 242862306a36Sopenharmony_ci } else { 242962306a36Sopenharmony_ci diff = vm->plugged_size - vm->requested_size; 243062306a36Sopenharmony_ci rc = virtio_mem_unplug_request(vm, diff); 243162306a36Sopenharmony_ci } 243262306a36Sopenharmony_ci } 243362306a36Sopenharmony_ci 243462306a36Sopenharmony_ci /* 243562306a36Sopenharmony_ci * Keep retrying to offline and remove completely unplugged Linux 243662306a36Sopenharmony_ci * memory blocks. 243762306a36Sopenharmony_ci */ 243862306a36Sopenharmony_ci if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb) 243962306a36Sopenharmony_ci rc = -EBUSY; 244062306a36Sopenharmony_ci 244162306a36Sopenharmony_ci switch (rc) { 244262306a36Sopenharmony_ci case 0: 244362306a36Sopenharmony_ci vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; 244462306a36Sopenharmony_ci break; 244562306a36Sopenharmony_ci case -ENOSPC: 244662306a36Sopenharmony_ci /* 244762306a36Sopenharmony_ci * We cannot add any more memory (alignment, physical limit) 244862306a36Sopenharmony_ci * or we have too many offline memory blocks. 244962306a36Sopenharmony_ci */ 245062306a36Sopenharmony_ci break; 245162306a36Sopenharmony_ci case -ETXTBSY: 245262306a36Sopenharmony_ci /* 245362306a36Sopenharmony_ci * The hypervisor cannot process our request right now 245462306a36Sopenharmony_ci * (e.g., out of memory, migrating); 245562306a36Sopenharmony_ci */ 245662306a36Sopenharmony_ci case -EBUSY: 245762306a36Sopenharmony_ci /* 245862306a36Sopenharmony_ci * We cannot free up any memory to unplug it (all plugged memory 245962306a36Sopenharmony_ci * is busy). 246062306a36Sopenharmony_ci */ 246162306a36Sopenharmony_ci case -ENOMEM: 246262306a36Sopenharmony_ci /* Out of memory, try again later. */ 246362306a36Sopenharmony_ci hrtimer_start(&vm->retry_timer, ms_to_ktime(vm->retry_timer_ms), 246462306a36Sopenharmony_ci HRTIMER_MODE_REL); 246562306a36Sopenharmony_ci break; 246662306a36Sopenharmony_ci case -EAGAIN: 246762306a36Sopenharmony_ci /* Retry immediately (e.g., the config changed). */ 246862306a36Sopenharmony_ci goto retry; 246962306a36Sopenharmony_ci default: 247062306a36Sopenharmony_ci /* Unknown error, mark as broken */ 247162306a36Sopenharmony_ci dev_err(&vm->vdev->dev, 247262306a36Sopenharmony_ci "unknown error, marking device broken: %d\n", rc); 247362306a36Sopenharmony_ci vm->broken = true; 247462306a36Sopenharmony_ci } 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci atomic_set(&vm->wq_active, 0); 247762306a36Sopenharmony_ci} 247862306a36Sopenharmony_ci 247962306a36Sopenharmony_cistatic enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer) 248062306a36Sopenharmony_ci{ 248162306a36Sopenharmony_ci struct virtio_mem *vm = container_of(timer, struct virtio_mem, 248262306a36Sopenharmony_ci retry_timer); 248362306a36Sopenharmony_ci 248462306a36Sopenharmony_ci virtio_mem_retry(vm); 248562306a36Sopenharmony_ci vm->retry_timer_ms = min_t(unsigned int, vm->retry_timer_ms * 2, 248662306a36Sopenharmony_ci VIRTIO_MEM_RETRY_TIMER_MAX_MS); 248762306a36Sopenharmony_ci return HRTIMER_NORESTART; 248862306a36Sopenharmony_ci} 248962306a36Sopenharmony_ci 249062306a36Sopenharmony_cistatic void virtio_mem_handle_response(struct virtqueue *vq) 249162306a36Sopenharmony_ci{ 249262306a36Sopenharmony_ci struct virtio_mem *vm = vq->vdev->priv; 249362306a36Sopenharmony_ci 249462306a36Sopenharmony_ci wake_up(&vm->host_resp); 249562306a36Sopenharmony_ci} 249662306a36Sopenharmony_ci 249762306a36Sopenharmony_cistatic int virtio_mem_init_vq(struct virtio_mem *vm) 249862306a36Sopenharmony_ci{ 249962306a36Sopenharmony_ci struct virtqueue *vq; 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci vq = virtio_find_single_vq(vm->vdev, virtio_mem_handle_response, 250262306a36Sopenharmony_ci "guest-request"); 250362306a36Sopenharmony_ci if (IS_ERR(vq)) 250462306a36Sopenharmony_ci return PTR_ERR(vq); 250562306a36Sopenharmony_ci vm->vq = vq; 250662306a36Sopenharmony_ci 250762306a36Sopenharmony_ci return 0; 250862306a36Sopenharmony_ci} 250962306a36Sopenharmony_ci 251062306a36Sopenharmony_cistatic int virtio_mem_init_hotplug(struct virtio_mem *vm) 251162306a36Sopenharmony_ci{ 251262306a36Sopenharmony_ci const struct range pluggable_range = mhp_get_pluggable_range(true); 251362306a36Sopenharmony_ci uint64_t unit_pages, sb_size, addr; 251462306a36Sopenharmony_ci int rc; 251562306a36Sopenharmony_ci 251662306a36Sopenharmony_ci /* bad device setup - warn only */ 251762306a36Sopenharmony_ci if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) 251862306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, 251962306a36Sopenharmony_ci "The alignment of the physical start address can make some memory unusable.\n"); 252062306a36Sopenharmony_ci if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes())) 252162306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, 252262306a36Sopenharmony_ci "The alignment of the physical end address can make some memory unusable.\n"); 252362306a36Sopenharmony_ci if (vm->addr < pluggable_range.start || 252462306a36Sopenharmony_ci vm->addr + vm->region_size - 1 > pluggable_range.end) 252562306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, 252662306a36Sopenharmony_ci "Some device memory is not addressable/pluggable. This can make some memory unusable.\n"); 252762306a36Sopenharmony_ci 252862306a36Sopenharmony_ci /* Prepare the offline threshold - make sure we can add two blocks. */ 252962306a36Sopenharmony_ci vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), 253062306a36Sopenharmony_ci VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci /* 253362306a36Sopenharmony_ci * alloc_contig_range() works reliably with pageblock 253462306a36Sopenharmony_ci * granularity on ZONE_NORMAL, use pageblock_nr_pages. 253562306a36Sopenharmony_ci */ 253662306a36Sopenharmony_ci sb_size = PAGE_SIZE * pageblock_nr_pages; 253762306a36Sopenharmony_ci sb_size = max_t(uint64_t, vm->device_block_size, sb_size); 253862306a36Sopenharmony_ci 253962306a36Sopenharmony_ci if (sb_size < memory_block_size_bytes() && !force_bbm) { 254062306a36Sopenharmony_ci /* SBM: At least two subblocks per Linux memory block. */ 254162306a36Sopenharmony_ci vm->in_sbm = true; 254262306a36Sopenharmony_ci vm->sbm.sb_size = sb_size; 254362306a36Sopenharmony_ci vm->sbm.sbs_per_mb = memory_block_size_bytes() / 254462306a36Sopenharmony_ci vm->sbm.sb_size; 254562306a36Sopenharmony_ci 254662306a36Sopenharmony_ci /* Round up to the next full memory block */ 254762306a36Sopenharmony_ci addr = max_t(uint64_t, vm->addr, pluggable_range.start) + 254862306a36Sopenharmony_ci memory_block_size_bytes() - 1; 254962306a36Sopenharmony_ci vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr); 255062306a36Sopenharmony_ci vm->sbm.next_mb_id = vm->sbm.first_mb_id; 255162306a36Sopenharmony_ci } else { 255262306a36Sopenharmony_ci /* BBM: At least one Linux memory block. */ 255362306a36Sopenharmony_ci vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size, 255462306a36Sopenharmony_ci memory_block_size_bytes()); 255562306a36Sopenharmony_ci 255662306a36Sopenharmony_ci if (bbm_block_size) { 255762306a36Sopenharmony_ci if (!is_power_of_2(bbm_block_size)) { 255862306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, 255962306a36Sopenharmony_ci "bbm_block_size is not a power of 2"); 256062306a36Sopenharmony_ci } else if (bbm_block_size < vm->bbm.bb_size) { 256162306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, 256262306a36Sopenharmony_ci "bbm_block_size is too small"); 256362306a36Sopenharmony_ci } else { 256462306a36Sopenharmony_ci vm->bbm.bb_size = bbm_block_size; 256562306a36Sopenharmony_ci } 256662306a36Sopenharmony_ci } 256762306a36Sopenharmony_ci 256862306a36Sopenharmony_ci /* Round up to the next aligned big block */ 256962306a36Sopenharmony_ci addr = max_t(uint64_t, vm->addr, pluggable_range.start) + 257062306a36Sopenharmony_ci vm->bbm.bb_size - 1; 257162306a36Sopenharmony_ci vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr); 257262306a36Sopenharmony_ci vm->bbm.next_bb_id = vm->bbm.first_bb_id; 257362306a36Sopenharmony_ci 257462306a36Sopenharmony_ci /* Make sure we can add two big blocks. */ 257562306a36Sopenharmony_ci vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, 257662306a36Sopenharmony_ci vm->offline_threshold); 257762306a36Sopenharmony_ci } 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "memory block size: 0x%lx", 258062306a36Sopenharmony_ci memory_block_size_bytes()); 258162306a36Sopenharmony_ci if (vm->in_sbm) 258262306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "subblock size: 0x%llx", 258362306a36Sopenharmony_ci (unsigned long long)vm->sbm.sb_size); 258462306a36Sopenharmony_ci else 258562306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "big block size: 0x%llx", 258662306a36Sopenharmony_ci (unsigned long long)vm->bbm.bb_size); 258762306a36Sopenharmony_ci 258862306a36Sopenharmony_ci /* create the parent resource for all memory */ 258962306a36Sopenharmony_ci rc = virtio_mem_create_resource(vm); 259062306a36Sopenharmony_ci if (rc) 259162306a36Sopenharmony_ci return rc; 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci /* use a single dynamic memory group to cover the whole memory device */ 259462306a36Sopenharmony_ci if (vm->in_sbm) 259562306a36Sopenharmony_ci unit_pages = PHYS_PFN(memory_block_size_bytes()); 259662306a36Sopenharmony_ci else 259762306a36Sopenharmony_ci unit_pages = PHYS_PFN(vm->bbm.bb_size); 259862306a36Sopenharmony_ci rc = memory_group_register_dynamic(vm->nid, unit_pages); 259962306a36Sopenharmony_ci if (rc < 0) 260062306a36Sopenharmony_ci goto out_del_resource; 260162306a36Sopenharmony_ci vm->mgid = rc; 260262306a36Sopenharmony_ci 260362306a36Sopenharmony_ci /* 260462306a36Sopenharmony_ci * If we still have memory plugged, we have to unplug all memory first. 260562306a36Sopenharmony_ci * Registering our parent resource makes sure that this memory isn't 260662306a36Sopenharmony_ci * actually in use (e.g., trying to reload the driver). 260762306a36Sopenharmony_ci */ 260862306a36Sopenharmony_ci if (vm->plugged_size) { 260962306a36Sopenharmony_ci vm->unplug_all_required = true; 261062306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); 261162306a36Sopenharmony_ci } 261262306a36Sopenharmony_ci 261362306a36Sopenharmony_ci /* register callbacks */ 261462306a36Sopenharmony_ci vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; 261562306a36Sopenharmony_ci rc = register_memory_notifier(&vm->memory_notifier); 261662306a36Sopenharmony_ci if (rc) 261762306a36Sopenharmony_ci goto out_unreg_group; 261862306a36Sopenharmony_ci rc = register_virtio_mem_device(vm); 261962306a36Sopenharmony_ci if (rc) 262062306a36Sopenharmony_ci goto out_unreg_mem; 262162306a36Sopenharmony_ci 262262306a36Sopenharmony_ci return 0; 262362306a36Sopenharmony_ciout_unreg_mem: 262462306a36Sopenharmony_ci unregister_memory_notifier(&vm->memory_notifier); 262562306a36Sopenharmony_ciout_unreg_group: 262662306a36Sopenharmony_ci memory_group_unregister(vm->mgid); 262762306a36Sopenharmony_ciout_del_resource: 262862306a36Sopenharmony_ci virtio_mem_delete_resource(vm); 262962306a36Sopenharmony_ci return rc; 263062306a36Sopenharmony_ci} 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE 263362306a36Sopenharmony_cistatic int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr, 263462306a36Sopenharmony_ci uint64_t size) 263562306a36Sopenharmony_ci{ 263662306a36Sopenharmony_ci const uint64_t nb_vm_blocks = size / vm->device_block_size; 263762306a36Sopenharmony_ci const struct virtio_mem_req req = { 263862306a36Sopenharmony_ci .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE), 263962306a36Sopenharmony_ci .u.state.addr = cpu_to_virtio64(vm->vdev, addr), 264062306a36Sopenharmony_ci .u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), 264162306a36Sopenharmony_ci }; 264262306a36Sopenharmony_ci int rc = -ENOMEM; 264362306a36Sopenharmony_ci 264462306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr, 264562306a36Sopenharmony_ci addr + size - 1); 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_ci switch (virtio_mem_send_request(vm, &req)) { 264862306a36Sopenharmony_ci case VIRTIO_MEM_RESP_ACK: 264962306a36Sopenharmony_ci return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state); 265062306a36Sopenharmony_ci case VIRTIO_MEM_RESP_ERROR: 265162306a36Sopenharmony_ci rc = -EINVAL; 265262306a36Sopenharmony_ci break; 265362306a36Sopenharmony_ci default: 265462306a36Sopenharmony_ci break; 265562306a36Sopenharmony_ci } 265662306a36Sopenharmony_ci 265762306a36Sopenharmony_ci dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc); 265862306a36Sopenharmony_ci return rc; 265962306a36Sopenharmony_ci} 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_cistatic bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb, 266262306a36Sopenharmony_ci unsigned long pfn) 266362306a36Sopenharmony_ci{ 266462306a36Sopenharmony_ci struct virtio_mem *vm = container_of(cb, struct virtio_mem, 266562306a36Sopenharmony_ci vmcore_cb); 266662306a36Sopenharmony_ci uint64_t addr = PFN_PHYS(pfn); 266762306a36Sopenharmony_ci bool is_ram; 266862306a36Sopenharmony_ci int rc; 266962306a36Sopenharmony_ci 267062306a36Sopenharmony_ci if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE)) 267162306a36Sopenharmony_ci return true; 267262306a36Sopenharmony_ci if (!vm->plugged_size) 267362306a36Sopenharmony_ci return false; 267462306a36Sopenharmony_ci 267562306a36Sopenharmony_ci /* 267662306a36Sopenharmony_ci * We have to serialize device requests and access to the information 267762306a36Sopenharmony_ci * about the block queried last. 267862306a36Sopenharmony_ci */ 267962306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 268062306a36Sopenharmony_ci 268162306a36Sopenharmony_ci addr = ALIGN_DOWN(addr, vm->device_block_size); 268262306a36Sopenharmony_ci if (addr != vm->last_block_addr) { 268362306a36Sopenharmony_ci rc = virtio_mem_send_state_request(vm, addr, 268462306a36Sopenharmony_ci vm->device_block_size); 268562306a36Sopenharmony_ci /* On any kind of error, we're going to signal !ram. */ 268662306a36Sopenharmony_ci if (rc == VIRTIO_MEM_STATE_PLUGGED) 268762306a36Sopenharmony_ci vm->last_block_plugged = true; 268862306a36Sopenharmony_ci else 268962306a36Sopenharmony_ci vm->last_block_plugged = false; 269062306a36Sopenharmony_ci vm->last_block_addr = addr; 269162306a36Sopenharmony_ci } 269262306a36Sopenharmony_ci 269362306a36Sopenharmony_ci is_ram = vm->last_block_plugged; 269462306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 269562306a36Sopenharmony_ci return is_ram; 269662306a36Sopenharmony_ci} 269762306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */ 269862306a36Sopenharmony_ci 269962306a36Sopenharmony_cistatic int virtio_mem_init_kdump(struct virtio_mem *vm) 270062306a36Sopenharmony_ci{ 270162306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE 270262306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n"); 270362306a36Sopenharmony_ci vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram; 270462306a36Sopenharmony_ci register_vmcore_cb(&vm->vmcore_cb); 270562306a36Sopenharmony_ci return 0; 270662306a36Sopenharmony_ci#else /* CONFIG_PROC_VMCORE */ 270762306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n"); 270862306a36Sopenharmony_ci return -EBUSY; 270962306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */ 271062306a36Sopenharmony_ci} 271162306a36Sopenharmony_ci 271262306a36Sopenharmony_cistatic int virtio_mem_init(struct virtio_mem *vm) 271362306a36Sopenharmony_ci{ 271462306a36Sopenharmony_ci uint16_t node_id; 271562306a36Sopenharmony_ci 271662306a36Sopenharmony_ci if (!vm->vdev->config->get) { 271762306a36Sopenharmony_ci dev_err(&vm->vdev->dev, "config access disabled\n"); 271862306a36Sopenharmony_ci return -EINVAL; 271962306a36Sopenharmony_ci } 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_ci /* Fetch all properties that can't change. */ 272262306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, 272362306a36Sopenharmony_ci &vm->plugged_size); 272462306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, 272562306a36Sopenharmony_ci &vm->device_block_size); 272662306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, 272762306a36Sopenharmony_ci &node_id); 272862306a36Sopenharmony_ci vm->nid = virtio_mem_translate_node_id(vm, node_id); 272962306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); 273062306a36Sopenharmony_ci virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, 273162306a36Sopenharmony_ci &vm->region_size); 273262306a36Sopenharmony_ci 273362306a36Sopenharmony_ci /* Determine the nid for the device based on the lowest address. */ 273462306a36Sopenharmony_ci if (vm->nid == NUMA_NO_NODE) 273562306a36Sopenharmony_ci vm->nid = memory_add_physaddr_to_nid(vm->addr); 273662306a36Sopenharmony_ci 273762306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); 273862306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); 273962306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "device block size: 0x%llx", 274062306a36Sopenharmony_ci (unsigned long long)vm->device_block_size); 274162306a36Sopenharmony_ci if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) 274262306a36Sopenharmony_ci dev_info(&vm->vdev->dev, "nid: %d", vm->nid); 274362306a36Sopenharmony_ci 274462306a36Sopenharmony_ci /* 274562306a36Sopenharmony_ci * We don't want to (un)plug or reuse any memory when in kdump. The 274662306a36Sopenharmony_ci * memory is still accessible (but not exposed to Linux). 274762306a36Sopenharmony_ci */ 274862306a36Sopenharmony_ci if (vm->in_kdump) 274962306a36Sopenharmony_ci return virtio_mem_init_kdump(vm); 275062306a36Sopenharmony_ci return virtio_mem_init_hotplug(vm); 275162306a36Sopenharmony_ci} 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_cistatic int virtio_mem_create_resource(struct virtio_mem *vm) 275462306a36Sopenharmony_ci{ 275562306a36Sopenharmony_ci /* 275662306a36Sopenharmony_ci * When force-unloading the driver and removing the device, we 275762306a36Sopenharmony_ci * could have a garbage pointer. Duplicate the string. 275862306a36Sopenharmony_ci */ 275962306a36Sopenharmony_ci const char *name = kstrdup(dev_name(&vm->vdev->dev), GFP_KERNEL); 276062306a36Sopenharmony_ci 276162306a36Sopenharmony_ci if (!name) 276262306a36Sopenharmony_ci return -ENOMEM; 276362306a36Sopenharmony_ci 276462306a36Sopenharmony_ci /* Disallow mapping device memory via /dev/mem completely. */ 276562306a36Sopenharmony_ci vm->parent_resource = __request_mem_region(vm->addr, vm->region_size, 276662306a36Sopenharmony_ci name, IORESOURCE_SYSTEM_RAM | 276762306a36Sopenharmony_ci IORESOURCE_EXCLUSIVE); 276862306a36Sopenharmony_ci if (!vm->parent_resource) { 276962306a36Sopenharmony_ci kfree(name); 277062306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, "could not reserve device region\n"); 277162306a36Sopenharmony_ci dev_info(&vm->vdev->dev, 277262306a36Sopenharmony_ci "reloading the driver is not supported\n"); 277362306a36Sopenharmony_ci return -EBUSY; 277462306a36Sopenharmony_ci } 277562306a36Sopenharmony_ci 277662306a36Sopenharmony_ci /* The memory is not actually busy - make add_memory() work. */ 277762306a36Sopenharmony_ci vm->parent_resource->flags &= ~IORESOURCE_BUSY; 277862306a36Sopenharmony_ci return 0; 277962306a36Sopenharmony_ci} 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_cistatic void virtio_mem_delete_resource(struct virtio_mem *vm) 278262306a36Sopenharmony_ci{ 278362306a36Sopenharmony_ci const char *name; 278462306a36Sopenharmony_ci 278562306a36Sopenharmony_ci if (!vm->parent_resource) 278662306a36Sopenharmony_ci return; 278762306a36Sopenharmony_ci 278862306a36Sopenharmony_ci name = vm->parent_resource->name; 278962306a36Sopenharmony_ci release_resource(vm->parent_resource); 279062306a36Sopenharmony_ci kfree(vm->parent_resource); 279162306a36Sopenharmony_ci kfree(name); 279262306a36Sopenharmony_ci vm->parent_resource = NULL; 279362306a36Sopenharmony_ci} 279462306a36Sopenharmony_ci 279562306a36Sopenharmony_cistatic int virtio_mem_range_has_system_ram(struct resource *res, void *arg) 279662306a36Sopenharmony_ci{ 279762306a36Sopenharmony_ci return 1; 279862306a36Sopenharmony_ci} 279962306a36Sopenharmony_ci 280062306a36Sopenharmony_cistatic bool virtio_mem_has_memory_added(struct virtio_mem *vm) 280162306a36Sopenharmony_ci{ 280262306a36Sopenharmony_ci const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 280362306a36Sopenharmony_ci 280462306a36Sopenharmony_ci return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr, 280562306a36Sopenharmony_ci vm->addr + vm->region_size, NULL, 280662306a36Sopenharmony_ci virtio_mem_range_has_system_ram) == 1; 280762306a36Sopenharmony_ci} 280862306a36Sopenharmony_ci 280962306a36Sopenharmony_cistatic int virtio_mem_probe(struct virtio_device *vdev) 281062306a36Sopenharmony_ci{ 281162306a36Sopenharmony_ci struct virtio_mem *vm; 281262306a36Sopenharmony_ci int rc; 281362306a36Sopenharmony_ci 281462306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24); 281562306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10); 281662306a36Sopenharmony_ci 281762306a36Sopenharmony_ci vdev->priv = vm = kzalloc(sizeof(*vm), GFP_KERNEL); 281862306a36Sopenharmony_ci if (!vm) 281962306a36Sopenharmony_ci return -ENOMEM; 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_ci init_waitqueue_head(&vm->host_resp); 282262306a36Sopenharmony_ci vm->vdev = vdev; 282362306a36Sopenharmony_ci INIT_WORK(&vm->wq, virtio_mem_run_wq); 282462306a36Sopenharmony_ci mutex_init(&vm->hotplug_mutex); 282562306a36Sopenharmony_ci INIT_LIST_HEAD(&vm->next); 282662306a36Sopenharmony_ci spin_lock_init(&vm->removal_lock); 282762306a36Sopenharmony_ci hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 282862306a36Sopenharmony_ci vm->retry_timer.function = virtio_mem_timer_expired; 282962306a36Sopenharmony_ci vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; 283062306a36Sopenharmony_ci vm->in_kdump = is_kdump_kernel(); 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_ci /* register the virtqueue */ 283362306a36Sopenharmony_ci rc = virtio_mem_init_vq(vm); 283462306a36Sopenharmony_ci if (rc) 283562306a36Sopenharmony_ci goto out_free_vm; 283662306a36Sopenharmony_ci 283762306a36Sopenharmony_ci /* initialize the device by querying the config */ 283862306a36Sopenharmony_ci rc = virtio_mem_init(vm); 283962306a36Sopenharmony_ci if (rc) 284062306a36Sopenharmony_ci goto out_del_vq; 284162306a36Sopenharmony_ci 284262306a36Sopenharmony_ci virtio_device_ready(vdev); 284362306a36Sopenharmony_ci 284462306a36Sopenharmony_ci /* trigger a config update to start processing the requested_size */ 284562306a36Sopenharmony_ci if (!vm->in_kdump) { 284662306a36Sopenharmony_ci atomic_set(&vm->config_changed, 1); 284762306a36Sopenharmony_ci queue_work(system_freezable_wq, &vm->wq); 284862306a36Sopenharmony_ci } 284962306a36Sopenharmony_ci 285062306a36Sopenharmony_ci return 0; 285162306a36Sopenharmony_ciout_del_vq: 285262306a36Sopenharmony_ci vdev->config->del_vqs(vdev); 285362306a36Sopenharmony_ciout_free_vm: 285462306a36Sopenharmony_ci kfree(vm); 285562306a36Sopenharmony_ci vdev->priv = NULL; 285662306a36Sopenharmony_ci 285762306a36Sopenharmony_ci return rc; 285862306a36Sopenharmony_ci} 285962306a36Sopenharmony_ci 286062306a36Sopenharmony_cistatic void virtio_mem_deinit_hotplug(struct virtio_mem *vm) 286162306a36Sopenharmony_ci{ 286262306a36Sopenharmony_ci unsigned long mb_id; 286362306a36Sopenharmony_ci int rc; 286462306a36Sopenharmony_ci 286562306a36Sopenharmony_ci /* 286662306a36Sopenharmony_ci * Make sure the workqueue won't be triggered anymore and no memory 286762306a36Sopenharmony_ci * blocks can be onlined/offlined until we're finished here. 286862306a36Sopenharmony_ci */ 286962306a36Sopenharmony_ci mutex_lock(&vm->hotplug_mutex); 287062306a36Sopenharmony_ci spin_lock_irq(&vm->removal_lock); 287162306a36Sopenharmony_ci vm->removing = true; 287262306a36Sopenharmony_ci spin_unlock_irq(&vm->removal_lock); 287362306a36Sopenharmony_ci mutex_unlock(&vm->hotplug_mutex); 287462306a36Sopenharmony_ci 287562306a36Sopenharmony_ci /* wait until the workqueue stopped */ 287662306a36Sopenharmony_ci cancel_work_sync(&vm->wq); 287762306a36Sopenharmony_ci hrtimer_cancel(&vm->retry_timer); 287862306a36Sopenharmony_ci 287962306a36Sopenharmony_ci if (vm->in_sbm) { 288062306a36Sopenharmony_ci /* 288162306a36Sopenharmony_ci * After we unregistered our callbacks, user space can online 288262306a36Sopenharmony_ci * partially plugged offline blocks. Make sure to remove them. 288362306a36Sopenharmony_ci */ 288462306a36Sopenharmony_ci virtio_mem_sbm_for_each_mb(vm, mb_id, 288562306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { 288662306a36Sopenharmony_ci rc = virtio_mem_sbm_remove_mb(vm, mb_id); 288762306a36Sopenharmony_ci BUG_ON(rc); 288862306a36Sopenharmony_ci virtio_mem_sbm_set_mb_state(vm, mb_id, 288962306a36Sopenharmony_ci VIRTIO_MEM_SBM_MB_UNUSED); 289062306a36Sopenharmony_ci } 289162306a36Sopenharmony_ci /* 289262306a36Sopenharmony_ci * After we unregistered our callbacks, user space can no longer 289362306a36Sopenharmony_ci * offline partially plugged online memory blocks. No need to 289462306a36Sopenharmony_ci * worry about them. 289562306a36Sopenharmony_ci */ 289662306a36Sopenharmony_ci } 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_ci /* unregister callbacks */ 289962306a36Sopenharmony_ci unregister_virtio_mem_device(vm); 290062306a36Sopenharmony_ci unregister_memory_notifier(&vm->memory_notifier); 290162306a36Sopenharmony_ci 290262306a36Sopenharmony_ci /* 290362306a36Sopenharmony_ci * There is no way we could reliably remove all memory we have added to 290462306a36Sopenharmony_ci * the system. And there is no way to stop the driver/device from going 290562306a36Sopenharmony_ci * away. Warn at least. 290662306a36Sopenharmony_ci */ 290762306a36Sopenharmony_ci if (virtio_mem_has_memory_added(vm)) { 290862306a36Sopenharmony_ci dev_warn(&vm->vdev->dev, 290962306a36Sopenharmony_ci "device still has system memory added\n"); 291062306a36Sopenharmony_ci } else { 291162306a36Sopenharmony_ci virtio_mem_delete_resource(vm); 291262306a36Sopenharmony_ci kfree_const(vm->resource_name); 291362306a36Sopenharmony_ci memory_group_unregister(vm->mgid); 291462306a36Sopenharmony_ci } 291562306a36Sopenharmony_ci 291662306a36Sopenharmony_ci /* remove all tracking data - no locking needed */ 291762306a36Sopenharmony_ci if (vm->in_sbm) { 291862306a36Sopenharmony_ci vfree(vm->sbm.mb_states); 291962306a36Sopenharmony_ci vfree(vm->sbm.sb_states); 292062306a36Sopenharmony_ci } else { 292162306a36Sopenharmony_ci vfree(vm->bbm.bb_states); 292262306a36Sopenharmony_ci } 292362306a36Sopenharmony_ci} 292462306a36Sopenharmony_ci 292562306a36Sopenharmony_cistatic void virtio_mem_deinit_kdump(struct virtio_mem *vm) 292662306a36Sopenharmony_ci{ 292762306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE 292862306a36Sopenharmony_ci unregister_vmcore_cb(&vm->vmcore_cb); 292962306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */ 293062306a36Sopenharmony_ci} 293162306a36Sopenharmony_ci 293262306a36Sopenharmony_cistatic void virtio_mem_remove(struct virtio_device *vdev) 293362306a36Sopenharmony_ci{ 293462306a36Sopenharmony_ci struct virtio_mem *vm = vdev->priv; 293562306a36Sopenharmony_ci 293662306a36Sopenharmony_ci if (vm->in_kdump) 293762306a36Sopenharmony_ci virtio_mem_deinit_kdump(vm); 293862306a36Sopenharmony_ci else 293962306a36Sopenharmony_ci virtio_mem_deinit_hotplug(vm); 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_ci /* reset the device and cleanup the queues */ 294262306a36Sopenharmony_ci virtio_reset_device(vdev); 294362306a36Sopenharmony_ci vdev->config->del_vqs(vdev); 294462306a36Sopenharmony_ci 294562306a36Sopenharmony_ci kfree(vm); 294662306a36Sopenharmony_ci vdev->priv = NULL; 294762306a36Sopenharmony_ci} 294862306a36Sopenharmony_ci 294962306a36Sopenharmony_cistatic void virtio_mem_config_changed(struct virtio_device *vdev) 295062306a36Sopenharmony_ci{ 295162306a36Sopenharmony_ci struct virtio_mem *vm = vdev->priv; 295262306a36Sopenharmony_ci 295362306a36Sopenharmony_ci if (unlikely(vm->in_kdump)) 295462306a36Sopenharmony_ci return; 295562306a36Sopenharmony_ci 295662306a36Sopenharmony_ci atomic_set(&vm->config_changed, 1); 295762306a36Sopenharmony_ci virtio_mem_retry(vm); 295862306a36Sopenharmony_ci} 295962306a36Sopenharmony_ci 296062306a36Sopenharmony_ci#ifdef CONFIG_PM_SLEEP 296162306a36Sopenharmony_cistatic int virtio_mem_freeze(struct virtio_device *vdev) 296262306a36Sopenharmony_ci{ 296362306a36Sopenharmony_ci /* 296462306a36Sopenharmony_ci * When restarting the VM, all memory is usually unplugged. Don't 296562306a36Sopenharmony_ci * allow to suspend/hibernate. 296662306a36Sopenharmony_ci */ 296762306a36Sopenharmony_ci dev_err(&vdev->dev, "save/restore not supported.\n"); 296862306a36Sopenharmony_ci return -EPERM; 296962306a36Sopenharmony_ci} 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_cistatic int virtio_mem_restore(struct virtio_device *vdev) 297262306a36Sopenharmony_ci{ 297362306a36Sopenharmony_ci return -EPERM; 297462306a36Sopenharmony_ci} 297562306a36Sopenharmony_ci#endif 297662306a36Sopenharmony_ci 297762306a36Sopenharmony_cistatic unsigned int virtio_mem_features[] = { 297862306a36Sopenharmony_ci#if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA) 297962306a36Sopenharmony_ci VIRTIO_MEM_F_ACPI_PXM, 298062306a36Sopenharmony_ci#endif 298162306a36Sopenharmony_ci VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, 298262306a36Sopenharmony_ci}; 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_cistatic const struct virtio_device_id virtio_mem_id_table[] = { 298562306a36Sopenharmony_ci { VIRTIO_ID_MEM, VIRTIO_DEV_ANY_ID }, 298662306a36Sopenharmony_ci { 0 }, 298762306a36Sopenharmony_ci}; 298862306a36Sopenharmony_ci 298962306a36Sopenharmony_cistatic struct virtio_driver virtio_mem_driver = { 299062306a36Sopenharmony_ci .feature_table = virtio_mem_features, 299162306a36Sopenharmony_ci .feature_table_size = ARRAY_SIZE(virtio_mem_features), 299262306a36Sopenharmony_ci .driver.name = KBUILD_MODNAME, 299362306a36Sopenharmony_ci .driver.owner = THIS_MODULE, 299462306a36Sopenharmony_ci .id_table = virtio_mem_id_table, 299562306a36Sopenharmony_ci .probe = virtio_mem_probe, 299662306a36Sopenharmony_ci .remove = virtio_mem_remove, 299762306a36Sopenharmony_ci .config_changed = virtio_mem_config_changed, 299862306a36Sopenharmony_ci#ifdef CONFIG_PM_SLEEP 299962306a36Sopenharmony_ci .freeze = virtio_mem_freeze, 300062306a36Sopenharmony_ci .restore = virtio_mem_restore, 300162306a36Sopenharmony_ci#endif 300262306a36Sopenharmony_ci}; 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_cimodule_virtio_driver(virtio_mem_driver); 300562306a36Sopenharmony_ciMODULE_DEVICE_TABLE(virtio, virtio_mem_id_table); 300662306a36Sopenharmony_ciMODULE_AUTHOR("David Hildenbrand <david@redhat.com>"); 300762306a36Sopenharmony_ciMODULE_DESCRIPTION("Virtio-mem driver"); 300862306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 3009