162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2012, Microsoft Corporation. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Author: 662306a36Sopenharmony_ci * K. Y. Srinivasan <kys@microsoft.com> 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/cleanup.h> 1262306a36Sopenharmony_ci#include <linux/kernel.h> 1362306a36Sopenharmony_ci#include <linux/jiffies.h> 1462306a36Sopenharmony_ci#include <linux/mman.h> 1562306a36Sopenharmony_ci#include <linux/debugfs.h> 1662306a36Sopenharmony_ci#include <linux/delay.h> 1762306a36Sopenharmony_ci#include <linux/init.h> 1862306a36Sopenharmony_ci#include <linux/module.h> 1962306a36Sopenharmony_ci#include <linux/slab.h> 2062306a36Sopenharmony_ci#include <linux/kthread.h> 2162306a36Sopenharmony_ci#include <linux/completion.h> 2262306a36Sopenharmony_ci#include <linux/count_zeros.h> 2362306a36Sopenharmony_ci#include <linux/memory_hotplug.h> 2462306a36Sopenharmony_ci#include <linux/memory.h> 2562306a36Sopenharmony_ci#include <linux/notifier.h> 2662306a36Sopenharmony_ci#include <linux/percpu_counter.h> 2762306a36Sopenharmony_ci#include <linux/page_reporting.h> 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#include <linux/hyperv.h> 3062306a36Sopenharmony_ci#include <asm/hyperv-tlfs.h> 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci#include <asm/mshyperv.h> 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 3562306a36Sopenharmony_ci#include "hv_trace_balloon.h" 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci * We begin with definitions supporting the Dynamic Memory protocol 3962306a36Sopenharmony_ci * with the host. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * Begin protocol definitions. 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* 4762306a36Sopenharmony_ci * Protocol versions. The low word is the minor version, the high word the major 4862306a36Sopenharmony_ci * version. 4962306a36Sopenharmony_ci * 5062306a36Sopenharmony_ci * History: 5162306a36Sopenharmony_ci * Initial version 1.0 5262306a36Sopenharmony_ci * Changed to 0.1 on 2009/03/25 5362306a36Sopenharmony_ci * Changes to 0.2 on 2009/05/14 5462306a36Sopenharmony_ci * Changes to 0.3 on 2009/12/03 5562306a36Sopenharmony_ci * Changed to 1.0 on 2011/04/05 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci#define DYNMEM_MAKE_VERSION(Major, Minor) ((__u32)(((Major) << 16) | (Minor))) 5962306a36Sopenharmony_ci#define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16) 6062306a36Sopenharmony_ci#define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff) 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cienum { 6362306a36Sopenharmony_ci DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3), 6462306a36Sopenharmony_ci DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0), 6562306a36Sopenharmony_ci DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0), 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1, 6862306a36Sopenharmony_ci DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2, 6962306a36Sopenharmony_ci DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3, 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10 7262306a36Sopenharmony_ci}; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci/* 7762306a36Sopenharmony_ci * Message Types 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cienum dm_message_type { 8162306a36Sopenharmony_ci /* 8262306a36Sopenharmony_ci * Version 0.3 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_ci DM_ERROR = 0, 8562306a36Sopenharmony_ci DM_VERSION_REQUEST = 1, 8662306a36Sopenharmony_ci DM_VERSION_RESPONSE = 2, 8762306a36Sopenharmony_ci DM_CAPABILITIES_REPORT = 3, 8862306a36Sopenharmony_ci DM_CAPABILITIES_RESPONSE = 4, 8962306a36Sopenharmony_ci DM_STATUS_REPORT = 5, 9062306a36Sopenharmony_ci DM_BALLOON_REQUEST = 6, 9162306a36Sopenharmony_ci DM_BALLOON_RESPONSE = 7, 9262306a36Sopenharmony_ci DM_UNBALLOON_REQUEST = 8, 9362306a36Sopenharmony_ci DM_UNBALLOON_RESPONSE = 9, 9462306a36Sopenharmony_ci DM_MEM_HOT_ADD_REQUEST = 10, 9562306a36Sopenharmony_ci DM_MEM_HOT_ADD_RESPONSE = 11, 9662306a36Sopenharmony_ci DM_VERSION_03_MAX = 11, 9762306a36Sopenharmony_ci /* 9862306a36Sopenharmony_ci * Version 1.0. 9962306a36Sopenharmony_ci */ 10062306a36Sopenharmony_ci DM_INFO_MESSAGE = 12, 10162306a36Sopenharmony_ci DM_VERSION_1_MAX = 12 10262306a36Sopenharmony_ci}; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci/* 10662306a36Sopenharmony_ci * Structures defining the dynamic memory management 10762306a36Sopenharmony_ci * protocol. 10862306a36Sopenharmony_ci */ 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ciunion dm_version { 11162306a36Sopenharmony_ci struct { 11262306a36Sopenharmony_ci __u16 minor_version; 11362306a36Sopenharmony_ci __u16 major_version; 11462306a36Sopenharmony_ci }; 11562306a36Sopenharmony_ci __u32 version; 11662306a36Sopenharmony_ci} __packed; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ciunion dm_caps { 12062306a36Sopenharmony_ci struct { 12162306a36Sopenharmony_ci __u64 balloon:1; 12262306a36Sopenharmony_ci __u64 hot_add:1; 12362306a36Sopenharmony_ci /* 12462306a36Sopenharmony_ci * To support guests that may have alignment 12562306a36Sopenharmony_ci * limitations on hot-add, the guest can specify 12662306a36Sopenharmony_ci * its alignment requirements; a value of n 12762306a36Sopenharmony_ci * represents an alignment of 2^n in mega bytes. 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci __u64 hot_add_alignment:4; 13062306a36Sopenharmony_ci __u64 reservedz:58; 13162306a36Sopenharmony_ci } cap_bits; 13262306a36Sopenharmony_ci __u64 caps; 13362306a36Sopenharmony_ci} __packed; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ciunion dm_mem_page_range { 13662306a36Sopenharmony_ci struct { 13762306a36Sopenharmony_ci /* 13862306a36Sopenharmony_ci * The PFN number of the first page in the range. 13962306a36Sopenharmony_ci * 40 bits is the architectural limit of a PFN 14062306a36Sopenharmony_ci * number for AMD64. 14162306a36Sopenharmony_ci */ 14262306a36Sopenharmony_ci __u64 start_page:40; 14362306a36Sopenharmony_ci /* 14462306a36Sopenharmony_ci * The number of pages in the range. 14562306a36Sopenharmony_ci */ 14662306a36Sopenharmony_ci __u64 page_cnt:24; 14762306a36Sopenharmony_ci } finfo; 14862306a36Sopenharmony_ci __u64 page_range; 14962306a36Sopenharmony_ci} __packed; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci/* 15462306a36Sopenharmony_ci * The header for all dynamic memory messages: 15562306a36Sopenharmony_ci * 15662306a36Sopenharmony_ci * type: Type of the message. 15762306a36Sopenharmony_ci * size: Size of the message in bytes; including the header. 15862306a36Sopenharmony_ci * trans_id: The guest is responsible for manufacturing this ID. 15962306a36Sopenharmony_ci */ 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cistruct dm_header { 16262306a36Sopenharmony_ci __u16 type; 16362306a36Sopenharmony_ci __u16 size; 16462306a36Sopenharmony_ci __u32 trans_id; 16562306a36Sopenharmony_ci} __packed; 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci/* 16862306a36Sopenharmony_ci * A generic message format for dynamic memory. 16962306a36Sopenharmony_ci * Specific message formats are defined later in the file. 17062306a36Sopenharmony_ci */ 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_cistruct dm_message { 17362306a36Sopenharmony_ci struct dm_header hdr; 17462306a36Sopenharmony_ci __u8 data[]; /* enclosed message */ 17562306a36Sopenharmony_ci} __packed; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci/* 17962306a36Sopenharmony_ci * Specific message types supporting the dynamic memory protocol. 18062306a36Sopenharmony_ci */ 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci/* 18362306a36Sopenharmony_ci * Version negotiation message. Sent from the guest to the host. 18462306a36Sopenharmony_ci * The guest is free to try different versions until the host 18562306a36Sopenharmony_ci * accepts the version. 18662306a36Sopenharmony_ci * 18762306a36Sopenharmony_ci * dm_version: The protocol version requested. 18862306a36Sopenharmony_ci * is_last_attempt: If TRUE, this is the last version guest will request. 18962306a36Sopenharmony_ci * reservedz: Reserved field, set to zero. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_cistruct dm_version_request { 19362306a36Sopenharmony_ci struct dm_header hdr; 19462306a36Sopenharmony_ci union dm_version version; 19562306a36Sopenharmony_ci __u32 is_last_attempt:1; 19662306a36Sopenharmony_ci __u32 reservedz:31; 19762306a36Sopenharmony_ci} __packed; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci/* 20062306a36Sopenharmony_ci * Version response message; Host to Guest and indicates 20162306a36Sopenharmony_ci * if the host has accepted the version sent by the guest. 20262306a36Sopenharmony_ci * 20362306a36Sopenharmony_ci * is_accepted: If TRUE, host has accepted the version and the guest 20462306a36Sopenharmony_ci * should proceed to the next stage of the protocol. FALSE indicates that 20562306a36Sopenharmony_ci * guest should re-try with a different version. 20662306a36Sopenharmony_ci * 20762306a36Sopenharmony_ci * reservedz: Reserved field, set to zero. 20862306a36Sopenharmony_ci */ 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistruct dm_version_response { 21162306a36Sopenharmony_ci struct dm_header hdr; 21262306a36Sopenharmony_ci __u64 is_accepted:1; 21362306a36Sopenharmony_ci __u64 reservedz:63; 21462306a36Sopenharmony_ci} __packed; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci/* 21762306a36Sopenharmony_ci * Message reporting capabilities. This is sent from the guest to the 21862306a36Sopenharmony_ci * host. 21962306a36Sopenharmony_ci */ 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_cistruct dm_capabilities { 22262306a36Sopenharmony_ci struct dm_header hdr; 22362306a36Sopenharmony_ci union dm_caps caps; 22462306a36Sopenharmony_ci __u64 min_page_cnt; 22562306a36Sopenharmony_ci __u64 max_page_number; 22662306a36Sopenharmony_ci} __packed; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci/* 22962306a36Sopenharmony_ci * Response to the capabilities message. This is sent from the host to the 23062306a36Sopenharmony_ci * guest. This message notifies if the host has accepted the guest's 23162306a36Sopenharmony_ci * capabilities. If the host has not accepted, the guest must shutdown 23262306a36Sopenharmony_ci * the service. 23362306a36Sopenharmony_ci * 23462306a36Sopenharmony_ci * is_accepted: Indicates if the host has accepted guest's capabilities. 23562306a36Sopenharmony_ci * reservedz: Must be 0. 23662306a36Sopenharmony_ci */ 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_cistruct dm_capabilities_resp_msg { 23962306a36Sopenharmony_ci struct dm_header hdr; 24062306a36Sopenharmony_ci __u64 is_accepted:1; 24162306a36Sopenharmony_ci __u64 reservedz:63; 24262306a36Sopenharmony_ci} __packed; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci/* 24562306a36Sopenharmony_ci * This message is used to report memory pressure from the guest. 24662306a36Sopenharmony_ci * This message is not part of any transaction and there is no 24762306a36Sopenharmony_ci * response to this message. 24862306a36Sopenharmony_ci * 24962306a36Sopenharmony_ci * num_avail: Available memory in pages. 25062306a36Sopenharmony_ci * num_committed: Committed memory in pages. 25162306a36Sopenharmony_ci * page_file_size: The accumulated size of all page files 25262306a36Sopenharmony_ci * in the system in pages. 25362306a36Sopenharmony_ci * zero_free: The number of zero and free pages. 25462306a36Sopenharmony_ci * page_file_writes: The writes to the page file in pages. 25562306a36Sopenharmony_ci * io_diff: An indicator of file cache efficiency or page file activity, 25662306a36Sopenharmony_ci * calculated as File Cache Page Fault Count - Page Read Count. 25762306a36Sopenharmony_ci * This value is in pages. 25862306a36Sopenharmony_ci * 25962306a36Sopenharmony_ci * Some of these metrics are Windows specific and fortunately 26062306a36Sopenharmony_ci * the algorithm on the host side that computes the guest memory 26162306a36Sopenharmony_ci * pressure only uses num_committed value. 26262306a36Sopenharmony_ci */ 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistruct dm_status { 26562306a36Sopenharmony_ci struct dm_header hdr; 26662306a36Sopenharmony_ci __u64 num_avail; 26762306a36Sopenharmony_ci __u64 num_committed; 26862306a36Sopenharmony_ci __u64 page_file_size; 26962306a36Sopenharmony_ci __u64 zero_free; 27062306a36Sopenharmony_ci __u32 page_file_writes; 27162306a36Sopenharmony_ci __u32 io_diff; 27262306a36Sopenharmony_ci} __packed; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci/* 27662306a36Sopenharmony_ci * Message to ask the guest to allocate memory - balloon up message. 27762306a36Sopenharmony_ci * This message is sent from the host to the guest. The guest may not be 27862306a36Sopenharmony_ci * able to allocate as much memory as requested. 27962306a36Sopenharmony_ci * 28062306a36Sopenharmony_ci * num_pages: number of pages to allocate. 28162306a36Sopenharmony_ci */ 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_cistruct dm_balloon { 28462306a36Sopenharmony_ci struct dm_header hdr; 28562306a36Sopenharmony_ci __u32 num_pages; 28662306a36Sopenharmony_ci __u32 reservedz; 28762306a36Sopenharmony_ci} __packed; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci/* 29162306a36Sopenharmony_ci * Balloon response message; this message is sent from the guest 29262306a36Sopenharmony_ci * to the host in response to the balloon message. 29362306a36Sopenharmony_ci * 29462306a36Sopenharmony_ci * reservedz: Reserved; must be set to zero. 29562306a36Sopenharmony_ci * more_pages: If FALSE, this is the last message of the transaction. 29662306a36Sopenharmony_ci * if TRUE there will atleast one more message from the guest. 29762306a36Sopenharmony_ci * 29862306a36Sopenharmony_ci * range_count: The number of ranges in the range array. 29962306a36Sopenharmony_ci * 30062306a36Sopenharmony_ci * range_array: An array of page ranges returned to the host. 30162306a36Sopenharmony_ci * 30262306a36Sopenharmony_ci */ 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_cistruct dm_balloon_response { 30562306a36Sopenharmony_ci struct dm_header hdr; 30662306a36Sopenharmony_ci __u32 reservedz; 30762306a36Sopenharmony_ci __u32 more_pages:1; 30862306a36Sopenharmony_ci __u32 range_count:31; 30962306a36Sopenharmony_ci union dm_mem_page_range range_array[]; 31062306a36Sopenharmony_ci} __packed; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci/* 31362306a36Sopenharmony_ci * Un-balloon message; this message is sent from the host 31462306a36Sopenharmony_ci * to the guest to give guest more memory. 31562306a36Sopenharmony_ci * 31662306a36Sopenharmony_ci * more_pages: If FALSE, this is the last message of the transaction. 31762306a36Sopenharmony_ci * if TRUE there will atleast one more message from the guest. 31862306a36Sopenharmony_ci * 31962306a36Sopenharmony_ci * reservedz: Reserved; must be set to zero. 32062306a36Sopenharmony_ci * 32162306a36Sopenharmony_ci * range_count: The number of ranges in the range array. 32262306a36Sopenharmony_ci * 32362306a36Sopenharmony_ci * range_array: An array of page ranges returned to the host. 32462306a36Sopenharmony_ci * 32562306a36Sopenharmony_ci */ 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_cistruct dm_unballoon_request { 32862306a36Sopenharmony_ci struct dm_header hdr; 32962306a36Sopenharmony_ci __u32 more_pages:1; 33062306a36Sopenharmony_ci __u32 reservedz:31; 33162306a36Sopenharmony_ci __u32 range_count; 33262306a36Sopenharmony_ci union dm_mem_page_range range_array[]; 33362306a36Sopenharmony_ci} __packed; 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci/* 33662306a36Sopenharmony_ci * Un-balloon response message; this message is sent from the guest 33762306a36Sopenharmony_ci * to the host in response to an unballoon request. 33862306a36Sopenharmony_ci * 33962306a36Sopenharmony_ci */ 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_cistruct dm_unballoon_response { 34262306a36Sopenharmony_ci struct dm_header hdr; 34362306a36Sopenharmony_ci} __packed; 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci/* 34762306a36Sopenharmony_ci * Hot add request message. Message sent from the host to the guest. 34862306a36Sopenharmony_ci * 34962306a36Sopenharmony_ci * mem_range: Memory range to hot add. 35062306a36Sopenharmony_ci * 35162306a36Sopenharmony_ci */ 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_cistruct dm_hot_add { 35462306a36Sopenharmony_ci struct dm_header hdr; 35562306a36Sopenharmony_ci union dm_mem_page_range range; 35662306a36Sopenharmony_ci} __packed; 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci/* 35962306a36Sopenharmony_ci * Hot add response message. 36062306a36Sopenharmony_ci * This message is sent by the guest to report the status of a hot add request. 36162306a36Sopenharmony_ci * If page_count is less than the requested page count, then the host should 36262306a36Sopenharmony_ci * assume all further hot add requests will fail, since this indicates that 36362306a36Sopenharmony_ci * the guest has hit an upper physical memory barrier. 36462306a36Sopenharmony_ci * 36562306a36Sopenharmony_ci * Hot adds may also fail due to low resources; in this case, the guest must 36662306a36Sopenharmony_ci * not complete this message until the hot add can succeed, and the host must 36762306a36Sopenharmony_ci * not send a new hot add request until the response is sent. 36862306a36Sopenharmony_ci * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS 36962306a36Sopenharmony_ci * times it fails the request. 37062306a36Sopenharmony_ci * 37162306a36Sopenharmony_ci * 37262306a36Sopenharmony_ci * page_count: number of pages that were successfully hot added. 37362306a36Sopenharmony_ci * 37462306a36Sopenharmony_ci * result: result of the operation 1: success, 0: failure. 37562306a36Sopenharmony_ci * 37662306a36Sopenharmony_ci */ 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_cistruct dm_hot_add_response { 37962306a36Sopenharmony_ci struct dm_header hdr; 38062306a36Sopenharmony_ci __u32 page_count; 38162306a36Sopenharmony_ci __u32 result; 38262306a36Sopenharmony_ci} __packed; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci/* 38562306a36Sopenharmony_ci * Types of information sent from host to the guest. 38662306a36Sopenharmony_ci */ 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_cienum dm_info_type { 38962306a36Sopenharmony_ci INFO_TYPE_MAX_PAGE_CNT = 0, 39062306a36Sopenharmony_ci MAX_INFO_TYPE 39162306a36Sopenharmony_ci}; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci/* 39562306a36Sopenharmony_ci * Header for the information message. 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_cistruct dm_info_header { 39962306a36Sopenharmony_ci enum dm_info_type type; 40062306a36Sopenharmony_ci __u32 data_size; 40162306a36Sopenharmony_ci} __packed; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci/* 40462306a36Sopenharmony_ci * This message is sent from the host to the guest to pass 40562306a36Sopenharmony_ci * some relevant information (win8 addition). 40662306a36Sopenharmony_ci * 40762306a36Sopenharmony_ci * reserved: no used. 40862306a36Sopenharmony_ci * info_size: size of the information blob. 40962306a36Sopenharmony_ci * info: information blob. 41062306a36Sopenharmony_ci */ 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_cistruct dm_info_msg { 41362306a36Sopenharmony_ci struct dm_header hdr; 41462306a36Sopenharmony_ci __u32 reserved; 41562306a36Sopenharmony_ci __u32 info_size; 41662306a36Sopenharmony_ci __u8 info[]; 41762306a36Sopenharmony_ci}; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci/* 42062306a36Sopenharmony_ci * End protocol definitions. 42162306a36Sopenharmony_ci */ 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci/* 42462306a36Sopenharmony_ci * State to manage hot adding memory into the guest. 42562306a36Sopenharmony_ci * The range start_pfn : end_pfn specifies the range 42662306a36Sopenharmony_ci * that the host has asked us to hot add. The range 42762306a36Sopenharmony_ci * start_pfn : ha_end_pfn specifies the range that we have 42862306a36Sopenharmony_ci * currently hot added. We hot add in multiples of 128M 42962306a36Sopenharmony_ci * chunks; it is possible that we may not be able to bring 43062306a36Sopenharmony_ci * online all the pages in the region. The range 43162306a36Sopenharmony_ci * covered_start_pfn:covered_end_pfn defines the pages that can 43262306a36Sopenharmony_ci * be brough online. 43362306a36Sopenharmony_ci */ 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_cistruct hv_hotadd_state { 43662306a36Sopenharmony_ci struct list_head list; 43762306a36Sopenharmony_ci unsigned long start_pfn; 43862306a36Sopenharmony_ci unsigned long covered_start_pfn; 43962306a36Sopenharmony_ci unsigned long covered_end_pfn; 44062306a36Sopenharmony_ci unsigned long ha_end_pfn; 44162306a36Sopenharmony_ci unsigned long end_pfn; 44262306a36Sopenharmony_ci /* 44362306a36Sopenharmony_ci * A list of gaps. 44462306a36Sopenharmony_ci */ 44562306a36Sopenharmony_ci struct list_head gap_list; 44662306a36Sopenharmony_ci}; 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_cistruct hv_hotadd_gap { 44962306a36Sopenharmony_ci struct list_head list; 45062306a36Sopenharmony_ci unsigned long start_pfn; 45162306a36Sopenharmony_ci unsigned long end_pfn; 45262306a36Sopenharmony_ci}; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_cistruct balloon_state { 45562306a36Sopenharmony_ci __u32 num_pages; 45662306a36Sopenharmony_ci struct work_struct wrk; 45762306a36Sopenharmony_ci}; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_cistruct hot_add_wrk { 46062306a36Sopenharmony_ci union dm_mem_page_range ha_page_range; 46162306a36Sopenharmony_ci union dm_mem_page_range ha_region_range; 46262306a36Sopenharmony_ci struct work_struct wrk; 46362306a36Sopenharmony_ci}; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_cistatic bool allow_hibernation; 46662306a36Sopenharmony_cistatic bool hot_add = true; 46762306a36Sopenharmony_cistatic bool do_hot_add; 46862306a36Sopenharmony_ci/* 46962306a36Sopenharmony_ci * Delay reporting memory pressure by 47062306a36Sopenharmony_ci * the specified number of seconds. 47162306a36Sopenharmony_ci */ 47262306a36Sopenharmony_cistatic uint pressure_report_delay = 45; 47362306a36Sopenharmony_ciextern unsigned int page_reporting_order; 47462306a36Sopenharmony_ci#define HV_MAX_FAILURES 2 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci/* 47762306a36Sopenharmony_ci * The last time we posted a pressure report to host. 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_cistatic unsigned long last_post_time; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_cistatic int hv_hypercall_multi_failure; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_cimodule_param(hot_add, bool, (S_IRUGO | S_IWUSR)); 48462306a36Sopenharmony_ciMODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_cimodule_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR)); 48762306a36Sopenharmony_ciMODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure"); 48862306a36Sopenharmony_cistatic atomic_t trans_id = ATOMIC_INIT(0); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_cistatic int dm_ring_size = VMBUS_RING_SIZE(16 * 1024); 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci/* 49362306a36Sopenharmony_ci * Driver specific state. 49462306a36Sopenharmony_ci */ 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_cienum hv_dm_state { 49762306a36Sopenharmony_ci DM_INITIALIZING = 0, 49862306a36Sopenharmony_ci DM_INITIALIZED, 49962306a36Sopenharmony_ci DM_BALLOON_UP, 50062306a36Sopenharmony_ci DM_BALLOON_DOWN, 50162306a36Sopenharmony_ci DM_HOT_ADD, 50262306a36Sopenharmony_ci DM_INIT_ERROR 50362306a36Sopenharmony_ci}; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_cistatic __u8 recv_buffer[HV_HYP_PAGE_SIZE]; 50762306a36Sopenharmony_cistatic __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE]; 50862306a36Sopenharmony_ci#define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE) 50962306a36Sopenharmony_ci#define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE) 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_cistruct hv_dynmem_device { 51262306a36Sopenharmony_ci struct hv_device *dev; 51362306a36Sopenharmony_ci enum hv_dm_state state; 51462306a36Sopenharmony_ci struct completion host_event; 51562306a36Sopenharmony_ci struct completion config_event; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci /* 51862306a36Sopenharmony_ci * Number of pages we have currently ballooned out. 51962306a36Sopenharmony_ci */ 52062306a36Sopenharmony_ci unsigned int num_pages_ballooned; 52162306a36Sopenharmony_ci unsigned int num_pages_onlined; 52262306a36Sopenharmony_ci unsigned int num_pages_added; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci /* 52562306a36Sopenharmony_ci * State to manage the ballooning (up) operation. 52662306a36Sopenharmony_ci */ 52762306a36Sopenharmony_ci struct balloon_state balloon_wrk; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci /* 53062306a36Sopenharmony_ci * State to execute the "hot-add" operation. 53162306a36Sopenharmony_ci */ 53262306a36Sopenharmony_ci struct hot_add_wrk ha_wrk; 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci /* 53562306a36Sopenharmony_ci * This state tracks if the host has specified a hot-add 53662306a36Sopenharmony_ci * region. 53762306a36Sopenharmony_ci */ 53862306a36Sopenharmony_ci bool host_specified_ha_region; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci /* 54162306a36Sopenharmony_ci * State to synchronize hot-add. 54262306a36Sopenharmony_ci */ 54362306a36Sopenharmony_ci struct completion ol_waitevent; 54462306a36Sopenharmony_ci /* 54562306a36Sopenharmony_ci * This thread handles hot-add 54662306a36Sopenharmony_ci * requests from the host as well as notifying 54762306a36Sopenharmony_ci * the host with regards to memory pressure in 54862306a36Sopenharmony_ci * the guest. 54962306a36Sopenharmony_ci */ 55062306a36Sopenharmony_ci struct task_struct *thread; 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci /* 55362306a36Sopenharmony_ci * Protects ha_region_list, num_pages_onlined counter and individual 55462306a36Sopenharmony_ci * regions from ha_region_list. 55562306a36Sopenharmony_ci */ 55662306a36Sopenharmony_ci spinlock_t ha_lock; 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci /* 55962306a36Sopenharmony_ci * A list of hot-add regions. 56062306a36Sopenharmony_ci */ 56162306a36Sopenharmony_ci struct list_head ha_region_list; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci /* 56462306a36Sopenharmony_ci * We start with the highest version we can support 56562306a36Sopenharmony_ci * and downgrade based on the host; we save here the 56662306a36Sopenharmony_ci * next version to try. 56762306a36Sopenharmony_ci */ 56862306a36Sopenharmony_ci __u32 next_version; 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci /* 57162306a36Sopenharmony_ci * The negotiated version agreed by host. 57262306a36Sopenharmony_ci */ 57362306a36Sopenharmony_ci __u32 version; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci struct page_reporting_dev_info pr_dev_info; 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci /* 57862306a36Sopenharmony_ci * Maximum number of pages that can be hot_add-ed 57962306a36Sopenharmony_ci */ 58062306a36Sopenharmony_ci __u64 max_dynamic_page_count; 58162306a36Sopenharmony_ci}; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_cistatic struct hv_dynmem_device dm_device; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_cistatic void post_status(struct hv_dynmem_device *dm); 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_cistatic void enable_page_reporting(void); 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_cistatic void disable_page_reporting(void); 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 59262306a36Sopenharmony_cistatic inline bool has_pfn_is_backed(struct hv_hotadd_state *has, 59362306a36Sopenharmony_ci unsigned long pfn) 59462306a36Sopenharmony_ci{ 59562306a36Sopenharmony_ci struct hv_hotadd_gap *gap; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci /* The page is not backed. */ 59862306a36Sopenharmony_ci if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn)) 59962306a36Sopenharmony_ci return false; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci /* Check for gaps. */ 60262306a36Sopenharmony_ci list_for_each_entry(gap, &has->gap_list, list) { 60362306a36Sopenharmony_ci if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn)) 60462306a36Sopenharmony_ci return false; 60562306a36Sopenharmony_ci } 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci return true; 60862306a36Sopenharmony_ci} 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_cistatic unsigned long hv_page_offline_check(unsigned long start_pfn, 61162306a36Sopenharmony_ci unsigned long nr_pages) 61262306a36Sopenharmony_ci{ 61362306a36Sopenharmony_ci unsigned long pfn = start_pfn, count = 0; 61462306a36Sopenharmony_ci struct hv_hotadd_state *has; 61562306a36Sopenharmony_ci bool found; 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci while (pfn < start_pfn + nr_pages) { 61862306a36Sopenharmony_ci /* 61962306a36Sopenharmony_ci * Search for HAS which covers the pfn and when we find one 62062306a36Sopenharmony_ci * count how many consequitive PFNs are covered. 62162306a36Sopenharmony_ci */ 62262306a36Sopenharmony_ci found = false; 62362306a36Sopenharmony_ci list_for_each_entry(has, &dm_device.ha_region_list, list) { 62462306a36Sopenharmony_ci while ((pfn >= has->start_pfn) && 62562306a36Sopenharmony_ci (pfn < has->end_pfn) && 62662306a36Sopenharmony_ci (pfn < start_pfn + nr_pages)) { 62762306a36Sopenharmony_ci found = true; 62862306a36Sopenharmony_ci if (has_pfn_is_backed(has, pfn)) 62962306a36Sopenharmony_ci count++; 63062306a36Sopenharmony_ci pfn++; 63162306a36Sopenharmony_ci } 63262306a36Sopenharmony_ci } 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci /* 63562306a36Sopenharmony_ci * This PFN is not in any HAS (e.g. we're offlining a region 63662306a36Sopenharmony_ci * which was present at boot), no need to account for it. Go 63762306a36Sopenharmony_ci * to the next one. 63862306a36Sopenharmony_ci */ 63962306a36Sopenharmony_ci if (!found) 64062306a36Sopenharmony_ci pfn++; 64162306a36Sopenharmony_ci } 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci return count; 64462306a36Sopenharmony_ci} 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_cistatic int hv_memory_notifier(struct notifier_block *nb, unsigned long val, 64762306a36Sopenharmony_ci void *v) 64862306a36Sopenharmony_ci{ 64962306a36Sopenharmony_ci struct memory_notify *mem = (struct memory_notify *)v; 65062306a36Sopenharmony_ci unsigned long pfn_count; 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci switch (val) { 65362306a36Sopenharmony_ci case MEM_ONLINE: 65462306a36Sopenharmony_ci case MEM_CANCEL_ONLINE: 65562306a36Sopenharmony_ci complete(&dm_device.ol_waitevent); 65662306a36Sopenharmony_ci break; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci case MEM_OFFLINE: 65962306a36Sopenharmony_ci scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { 66062306a36Sopenharmony_ci pfn_count = hv_page_offline_check(mem->start_pfn, 66162306a36Sopenharmony_ci mem->nr_pages); 66262306a36Sopenharmony_ci if (pfn_count <= dm_device.num_pages_onlined) { 66362306a36Sopenharmony_ci dm_device.num_pages_onlined -= pfn_count; 66462306a36Sopenharmony_ci } else { 66562306a36Sopenharmony_ci /* 66662306a36Sopenharmony_ci * We're offlining more pages than we 66762306a36Sopenharmony_ci * managed to online. This is 66862306a36Sopenharmony_ci * unexpected. In any case don't let 66962306a36Sopenharmony_ci * num_pages_onlined wrap around zero. 67062306a36Sopenharmony_ci */ 67162306a36Sopenharmony_ci WARN_ON_ONCE(1); 67262306a36Sopenharmony_ci dm_device.num_pages_onlined = 0; 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci } 67562306a36Sopenharmony_ci break; 67662306a36Sopenharmony_ci case MEM_GOING_ONLINE: 67762306a36Sopenharmony_ci case MEM_GOING_OFFLINE: 67862306a36Sopenharmony_ci case MEM_CANCEL_OFFLINE: 67962306a36Sopenharmony_ci break; 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci return NOTIFY_OK; 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_cistatic struct notifier_block hv_memory_nb = { 68562306a36Sopenharmony_ci .notifier_call = hv_memory_notifier, 68662306a36Sopenharmony_ci .priority = 0 68762306a36Sopenharmony_ci}; 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci/* Check if the particular page is backed and can be onlined and online it. */ 69062306a36Sopenharmony_cistatic void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg) 69162306a36Sopenharmony_ci{ 69262306a36Sopenharmony_ci if (!has_pfn_is_backed(has, page_to_pfn(pg))) { 69362306a36Sopenharmony_ci if (!PageOffline(pg)) 69462306a36Sopenharmony_ci __SetPageOffline(pg); 69562306a36Sopenharmony_ci return; 69662306a36Sopenharmony_ci } 69762306a36Sopenharmony_ci if (PageOffline(pg)) 69862306a36Sopenharmony_ci __ClearPageOffline(pg); 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci /* This frame is currently backed; online the page. */ 70162306a36Sopenharmony_ci generic_online_page(pg, 0); 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci lockdep_assert_held(&dm_device.ha_lock); 70462306a36Sopenharmony_ci dm_device.num_pages_onlined++; 70562306a36Sopenharmony_ci} 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_cistatic void hv_bring_pgs_online(struct hv_hotadd_state *has, 70862306a36Sopenharmony_ci unsigned long start_pfn, unsigned long size) 70962306a36Sopenharmony_ci{ 71062306a36Sopenharmony_ci int i; 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn); 71362306a36Sopenharmony_ci for (i = 0; i < size; i++) 71462306a36Sopenharmony_ci hv_page_online_one(has, pfn_to_page(start_pfn + i)); 71562306a36Sopenharmony_ci} 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_cistatic void hv_mem_hot_add(unsigned long start, unsigned long size, 71862306a36Sopenharmony_ci unsigned long pfn_count, 71962306a36Sopenharmony_ci struct hv_hotadd_state *has) 72062306a36Sopenharmony_ci{ 72162306a36Sopenharmony_ci int ret = 0; 72262306a36Sopenharmony_ci int i, nid; 72362306a36Sopenharmony_ci unsigned long start_pfn; 72462306a36Sopenharmony_ci unsigned long processed_pfn; 72562306a36Sopenharmony_ci unsigned long total_pfn = pfn_count; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci for (i = 0; i < (size/HA_CHUNK); i++) { 72862306a36Sopenharmony_ci start_pfn = start + (i * HA_CHUNK); 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { 73162306a36Sopenharmony_ci has->ha_end_pfn += HA_CHUNK; 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci if (total_pfn > HA_CHUNK) { 73462306a36Sopenharmony_ci processed_pfn = HA_CHUNK; 73562306a36Sopenharmony_ci total_pfn -= HA_CHUNK; 73662306a36Sopenharmony_ci } else { 73762306a36Sopenharmony_ci processed_pfn = total_pfn; 73862306a36Sopenharmony_ci total_pfn = 0; 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci has->covered_end_pfn += processed_pfn; 74262306a36Sopenharmony_ci } 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci reinit_completion(&dm_device.ol_waitevent); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); 74762306a36Sopenharmony_ci ret = add_memory(nid, PFN_PHYS((start_pfn)), 74862306a36Sopenharmony_ci (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE); 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci if (ret) { 75162306a36Sopenharmony_ci pr_err("hot_add memory failed error is %d\n", ret); 75262306a36Sopenharmony_ci if (ret == -EEXIST) { 75362306a36Sopenharmony_ci /* 75462306a36Sopenharmony_ci * This error indicates that the error 75562306a36Sopenharmony_ci * is not a transient failure. This is the 75662306a36Sopenharmony_ci * case where the guest's physical address map 75762306a36Sopenharmony_ci * precludes hot adding memory. Stop all further 75862306a36Sopenharmony_ci * memory hot-add. 75962306a36Sopenharmony_ci */ 76062306a36Sopenharmony_ci do_hot_add = false; 76162306a36Sopenharmony_ci } 76262306a36Sopenharmony_ci scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { 76362306a36Sopenharmony_ci has->ha_end_pfn -= HA_CHUNK; 76462306a36Sopenharmony_ci has->covered_end_pfn -= processed_pfn; 76562306a36Sopenharmony_ci } 76662306a36Sopenharmony_ci break; 76762306a36Sopenharmony_ci } 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci /* 77062306a36Sopenharmony_ci * Wait for memory to get onlined. If the kernel onlined the 77162306a36Sopenharmony_ci * memory when adding it, this will return directly. Otherwise, 77262306a36Sopenharmony_ci * it will wait for user space to online the memory. This helps 77362306a36Sopenharmony_ci * to avoid adding memory faster than it is getting onlined. As 77462306a36Sopenharmony_ci * adding succeeded, it is ok to proceed even if the memory was 77562306a36Sopenharmony_ci * not onlined in time. 77662306a36Sopenharmony_ci */ 77762306a36Sopenharmony_ci wait_for_completion_timeout(&dm_device.ol_waitevent, 5 * HZ); 77862306a36Sopenharmony_ci post_status(&dm_device); 77962306a36Sopenharmony_ci } 78062306a36Sopenharmony_ci} 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_cistatic void hv_online_page(struct page *pg, unsigned int order) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci struct hv_hotadd_state *has; 78562306a36Sopenharmony_ci unsigned long pfn = page_to_pfn(pg); 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci guard(spinlock_irqsave)(&dm_device.ha_lock); 78862306a36Sopenharmony_ci list_for_each_entry(has, &dm_device.ha_region_list, list) { 78962306a36Sopenharmony_ci /* The page belongs to a different HAS. */ 79062306a36Sopenharmony_ci if ((pfn < has->start_pfn) || 79162306a36Sopenharmony_ci (pfn + (1UL << order) > has->end_pfn)) 79262306a36Sopenharmony_ci continue; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci hv_bring_pgs_online(has, pfn, 1UL << order); 79562306a36Sopenharmony_ci break; 79662306a36Sopenharmony_ci } 79762306a36Sopenharmony_ci} 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_cistatic int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) 80062306a36Sopenharmony_ci{ 80162306a36Sopenharmony_ci struct hv_hotadd_state *has; 80262306a36Sopenharmony_ci struct hv_hotadd_gap *gap; 80362306a36Sopenharmony_ci unsigned long residual, new_inc; 80462306a36Sopenharmony_ci int ret = 0; 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci guard(spinlock_irqsave)(&dm_device.ha_lock); 80762306a36Sopenharmony_ci list_for_each_entry(has, &dm_device.ha_region_list, list) { 80862306a36Sopenharmony_ci /* 80962306a36Sopenharmony_ci * If the pfn range we are dealing with is not in the current 81062306a36Sopenharmony_ci * "hot add block", move on. 81162306a36Sopenharmony_ci */ 81262306a36Sopenharmony_ci if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) 81362306a36Sopenharmony_ci continue; 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci /* 81662306a36Sopenharmony_ci * If the current start pfn is not where the covered_end 81762306a36Sopenharmony_ci * is, create a gap and update covered_end_pfn. 81862306a36Sopenharmony_ci */ 81962306a36Sopenharmony_ci if (has->covered_end_pfn != start_pfn) { 82062306a36Sopenharmony_ci gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC); 82162306a36Sopenharmony_ci if (!gap) { 82262306a36Sopenharmony_ci ret = -ENOMEM; 82362306a36Sopenharmony_ci break; 82462306a36Sopenharmony_ci } 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci INIT_LIST_HEAD(&gap->list); 82762306a36Sopenharmony_ci gap->start_pfn = has->covered_end_pfn; 82862306a36Sopenharmony_ci gap->end_pfn = start_pfn; 82962306a36Sopenharmony_ci list_add_tail(&gap->list, &has->gap_list); 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci has->covered_end_pfn = start_pfn; 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci /* 83562306a36Sopenharmony_ci * If the current hot add-request extends beyond 83662306a36Sopenharmony_ci * our current limit; extend it. 83762306a36Sopenharmony_ci */ 83862306a36Sopenharmony_ci if ((start_pfn + pfn_cnt) > has->end_pfn) { 83962306a36Sopenharmony_ci residual = (start_pfn + pfn_cnt - has->end_pfn); 84062306a36Sopenharmony_ci /* 84162306a36Sopenharmony_ci * Extend the region by multiples of HA_CHUNK. 84262306a36Sopenharmony_ci */ 84362306a36Sopenharmony_ci new_inc = (residual / HA_CHUNK) * HA_CHUNK; 84462306a36Sopenharmony_ci if (residual % HA_CHUNK) 84562306a36Sopenharmony_ci new_inc += HA_CHUNK; 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci has->end_pfn += new_inc; 84862306a36Sopenharmony_ci } 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci ret = 1; 85162306a36Sopenharmony_ci break; 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci return ret; 85562306a36Sopenharmony_ci} 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_cistatic unsigned long handle_pg_range(unsigned long pg_start, 85862306a36Sopenharmony_ci unsigned long pg_count) 85962306a36Sopenharmony_ci{ 86062306a36Sopenharmony_ci unsigned long start_pfn = pg_start; 86162306a36Sopenharmony_ci unsigned long pfn_cnt = pg_count; 86262306a36Sopenharmony_ci unsigned long size; 86362306a36Sopenharmony_ci struct hv_hotadd_state *has; 86462306a36Sopenharmony_ci unsigned long pgs_ol = 0; 86562306a36Sopenharmony_ci unsigned long old_covered_state; 86662306a36Sopenharmony_ci unsigned long res = 0, flags; 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci pr_debug("Hot adding %lu pages starting at pfn 0x%lx.\n", pg_count, 86962306a36Sopenharmony_ci pg_start); 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci spin_lock_irqsave(&dm_device.ha_lock, flags); 87262306a36Sopenharmony_ci list_for_each_entry(has, &dm_device.ha_region_list, list) { 87362306a36Sopenharmony_ci /* 87462306a36Sopenharmony_ci * If the pfn range we are dealing with is not in the current 87562306a36Sopenharmony_ci * "hot add block", move on. 87662306a36Sopenharmony_ci */ 87762306a36Sopenharmony_ci if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) 87862306a36Sopenharmony_ci continue; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci old_covered_state = has->covered_end_pfn; 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci if (start_pfn < has->ha_end_pfn) { 88362306a36Sopenharmony_ci /* 88462306a36Sopenharmony_ci * This is the case where we are backing pages 88562306a36Sopenharmony_ci * in an already hot added region. Bring 88662306a36Sopenharmony_ci * these pages online first. 88762306a36Sopenharmony_ci */ 88862306a36Sopenharmony_ci pgs_ol = has->ha_end_pfn - start_pfn; 88962306a36Sopenharmony_ci if (pgs_ol > pfn_cnt) 89062306a36Sopenharmony_ci pgs_ol = pfn_cnt; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci has->covered_end_pfn += pgs_ol; 89362306a36Sopenharmony_ci pfn_cnt -= pgs_ol; 89462306a36Sopenharmony_ci /* 89562306a36Sopenharmony_ci * Check if the corresponding memory block is already 89662306a36Sopenharmony_ci * online. It is possible to observe struct pages still 89762306a36Sopenharmony_ci * being uninitialized here so check section instead. 89862306a36Sopenharmony_ci * In case the section is online we need to bring the 89962306a36Sopenharmony_ci * rest of pfns (which were not backed previously) 90062306a36Sopenharmony_ci * online too. 90162306a36Sopenharmony_ci */ 90262306a36Sopenharmony_ci if (start_pfn > has->start_pfn && 90362306a36Sopenharmony_ci online_section_nr(pfn_to_section_nr(start_pfn))) 90462306a36Sopenharmony_ci hv_bring_pgs_online(has, start_pfn, pgs_ol); 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci } 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) { 90962306a36Sopenharmony_ci /* 91062306a36Sopenharmony_ci * We have some residual hot add range 91162306a36Sopenharmony_ci * that needs to be hot added; hot add 91262306a36Sopenharmony_ci * it now. Hot add a multiple of 91362306a36Sopenharmony_ci * HA_CHUNK that fully covers the pages 91462306a36Sopenharmony_ci * we have. 91562306a36Sopenharmony_ci */ 91662306a36Sopenharmony_ci size = (has->end_pfn - has->ha_end_pfn); 91762306a36Sopenharmony_ci if (pfn_cnt <= size) { 91862306a36Sopenharmony_ci size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK); 91962306a36Sopenharmony_ci if (pfn_cnt % HA_CHUNK) 92062306a36Sopenharmony_ci size += HA_CHUNK; 92162306a36Sopenharmony_ci } else { 92262306a36Sopenharmony_ci pfn_cnt = size; 92362306a36Sopenharmony_ci } 92462306a36Sopenharmony_ci spin_unlock_irqrestore(&dm_device.ha_lock, flags); 92562306a36Sopenharmony_ci hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has); 92662306a36Sopenharmony_ci spin_lock_irqsave(&dm_device.ha_lock, flags); 92762306a36Sopenharmony_ci } 92862306a36Sopenharmony_ci /* 92962306a36Sopenharmony_ci * If we managed to online any pages that were given to us, 93062306a36Sopenharmony_ci * we declare success. 93162306a36Sopenharmony_ci */ 93262306a36Sopenharmony_ci res = has->covered_end_pfn - old_covered_state; 93362306a36Sopenharmony_ci break; 93462306a36Sopenharmony_ci } 93562306a36Sopenharmony_ci spin_unlock_irqrestore(&dm_device.ha_lock, flags); 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci return res; 93862306a36Sopenharmony_ci} 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_cistatic unsigned long process_hot_add(unsigned long pg_start, 94162306a36Sopenharmony_ci unsigned long pfn_cnt, 94262306a36Sopenharmony_ci unsigned long rg_start, 94362306a36Sopenharmony_ci unsigned long rg_size) 94462306a36Sopenharmony_ci{ 94562306a36Sopenharmony_ci struct hv_hotadd_state *ha_region = NULL; 94662306a36Sopenharmony_ci int covered; 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci if (pfn_cnt == 0) 94962306a36Sopenharmony_ci return 0; 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci if (!dm_device.host_specified_ha_region) { 95262306a36Sopenharmony_ci covered = pfn_covered(pg_start, pfn_cnt); 95362306a36Sopenharmony_ci if (covered < 0) 95462306a36Sopenharmony_ci return 0; 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci if (covered) 95762306a36Sopenharmony_ci goto do_pg_range; 95862306a36Sopenharmony_ci } 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci /* 96162306a36Sopenharmony_ci * If the host has specified a hot-add range; deal with it first. 96262306a36Sopenharmony_ci */ 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci if (rg_size != 0) { 96562306a36Sopenharmony_ci ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL); 96662306a36Sopenharmony_ci if (!ha_region) 96762306a36Sopenharmony_ci return 0; 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci INIT_LIST_HEAD(&ha_region->list); 97062306a36Sopenharmony_ci INIT_LIST_HEAD(&ha_region->gap_list); 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci ha_region->start_pfn = rg_start; 97362306a36Sopenharmony_ci ha_region->ha_end_pfn = rg_start; 97462306a36Sopenharmony_ci ha_region->covered_start_pfn = pg_start; 97562306a36Sopenharmony_ci ha_region->covered_end_pfn = pg_start; 97662306a36Sopenharmony_ci ha_region->end_pfn = rg_start + rg_size; 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { 97962306a36Sopenharmony_ci list_add_tail(&ha_region->list, &dm_device.ha_region_list); 98062306a36Sopenharmony_ci } 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_cido_pg_range: 98462306a36Sopenharmony_ci /* 98562306a36Sopenharmony_ci * Process the page range specified; bringing them 98662306a36Sopenharmony_ci * online if possible. 98762306a36Sopenharmony_ci */ 98862306a36Sopenharmony_ci return handle_pg_range(pg_start, pfn_cnt); 98962306a36Sopenharmony_ci} 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci#endif 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_cistatic void hot_add_req(struct work_struct *dummy) 99462306a36Sopenharmony_ci{ 99562306a36Sopenharmony_ci struct dm_hot_add_response resp; 99662306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 99762306a36Sopenharmony_ci unsigned long pg_start, pfn_cnt; 99862306a36Sopenharmony_ci unsigned long rg_start, rg_sz; 99962306a36Sopenharmony_ci#endif 100062306a36Sopenharmony_ci struct hv_dynmem_device *dm = &dm_device; 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci memset(&resp, 0, sizeof(struct dm_hot_add_response)); 100362306a36Sopenharmony_ci resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE; 100462306a36Sopenharmony_ci resp.hdr.size = sizeof(struct dm_hot_add_response); 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 100762306a36Sopenharmony_ci pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; 100862306a36Sopenharmony_ci pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci rg_start = dm->ha_wrk.ha_region_range.finfo.start_page; 101162306a36Sopenharmony_ci rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt; 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci if ((rg_start == 0) && (!dm->host_specified_ha_region)) { 101462306a36Sopenharmony_ci unsigned long region_size; 101562306a36Sopenharmony_ci unsigned long region_start; 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci /* 101862306a36Sopenharmony_ci * The host has not specified the hot-add region. 101962306a36Sopenharmony_ci * Based on the hot-add page range being specified, 102062306a36Sopenharmony_ci * compute a hot-add region that can cover the pages 102162306a36Sopenharmony_ci * that need to be hot-added while ensuring the alignment 102262306a36Sopenharmony_ci * and size requirements of Linux as it relates to hot-add. 102362306a36Sopenharmony_ci */ 102462306a36Sopenharmony_ci region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK; 102562306a36Sopenharmony_ci if (pfn_cnt % HA_CHUNK) 102662306a36Sopenharmony_ci region_size += HA_CHUNK; 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci region_start = (pg_start / HA_CHUNK) * HA_CHUNK; 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci rg_start = region_start; 103162306a36Sopenharmony_ci rg_sz = region_size; 103262306a36Sopenharmony_ci } 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci if (do_hot_add) 103562306a36Sopenharmony_ci resp.page_count = process_hot_add(pg_start, pfn_cnt, 103662306a36Sopenharmony_ci rg_start, rg_sz); 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci dm->num_pages_added += resp.page_count; 103962306a36Sopenharmony_ci#endif 104062306a36Sopenharmony_ci /* 104162306a36Sopenharmony_ci * The result field of the response structure has the 104262306a36Sopenharmony_ci * following semantics: 104362306a36Sopenharmony_ci * 104462306a36Sopenharmony_ci * 1. If all or some pages hot-added: Guest should return success. 104562306a36Sopenharmony_ci * 104662306a36Sopenharmony_ci * 2. If no pages could be hot-added: 104762306a36Sopenharmony_ci * 104862306a36Sopenharmony_ci * If the guest returns success, then the host 104962306a36Sopenharmony_ci * will not attempt any further hot-add operations. This 105062306a36Sopenharmony_ci * signifies a permanent failure. 105162306a36Sopenharmony_ci * 105262306a36Sopenharmony_ci * If the guest returns failure, then this failure will be 105362306a36Sopenharmony_ci * treated as a transient failure and the host may retry the 105462306a36Sopenharmony_ci * hot-add operation after some delay. 105562306a36Sopenharmony_ci */ 105662306a36Sopenharmony_ci if (resp.page_count > 0) 105762306a36Sopenharmony_ci resp.result = 1; 105862306a36Sopenharmony_ci else if (!do_hot_add) 105962306a36Sopenharmony_ci resp.result = 1; 106062306a36Sopenharmony_ci else 106162306a36Sopenharmony_ci resp.result = 0; 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci if (!do_hot_add || resp.page_count == 0) { 106462306a36Sopenharmony_ci if (!allow_hibernation) 106562306a36Sopenharmony_ci pr_err("Memory hot add failed\n"); 106662306a36Sopenharmony_ci else 106762306a36Sopenharmony_ci pr_info("Ignore hot-add request!\n"); 106862306a36Sopenharmony_ci } 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci dm->state = DM_INITIALIZED; 107162306a36Sopenharmony_ci resp.hdr.trans_id = atomic_inc_return(&trans_id); 107262306a36Sopenharmony_ci vmbus_sendpacket(dm->dev->channel, &resp, 107362306a36Sopenharmony_ci sizeof(struct dm_hot_add_response), 107462306a36Sopenharmony_ci (unsigned long)NULL, 107562306a36Sopenharmony_ci VM_PKT_DATA_INBAND, 0); 107662306a36Sopenharmony_ci} 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_cistatic void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) 107962306a36Sopenharmony_ci{ 108062306a36Sopenharmony_ci struct dm_info_header *info_hdr; 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci info_hdr = (struct dm_info_header *)msg->info; 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci switch (info_hdr->type) { 108562306a36Sopenharmony_ci case INFO_TYPE_MAX_PAGE_CNT: 108662306a36Sopenharmony_ci if (info_hdr->data_size == sizeof(__u64)) { 108762306a36Sopenharmony_ci __u64 *max_page_count = (__u64 *)&info_hdr[1]; 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci pr_info("Max. dynamic memory size: %llu MB\n", 109062306a36Sopenharmony_ci (*max_page_count) >> (20 - HV_HYP_PAGE_SHIFT)); 109162306a36Sopenharmony_ci dm->max_dynamic_page_count = *max_page_count; 109262306a36Sopenharmony_ci } 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci break; 109562306a36Sopenharmony_ci default: 109662306a36Sopenharmony_ci pr_warn("Received Unknown type: %d\n", info_hdr->type); 109762306a36Sopenharmony_ci } 109862306a36Sopenharmony_ci} 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_cistatic unsigned long compute_balloon_floor(void) 110162306a36Sopenharmony_ci{ 110262306a36Sopenharmony_ci unsigned long min_pages; 110362306a36Sopenharmony_ci unsigned long nr_pages = totalram_pages(); 110462306a36Sopenharmony_ci#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 110562306a36Sopenharmony_ci /* Simple continuous piecewiese linear function: 110662306a36Sopenharmony_ci * max MiB -> min MiB gradient 110762306a36Sopenharmony_ci * 0 0 110862306a36Sopenharmony_ci * 16 16 110962306a36Sopenharmony_ci * 32 24 111062306a36Sopenharmony_ci * 128 72 (1/2) 111162306a36Sopenharmony_ci * 512 168 (1/4) 111262306a36Sopenharmony_ci * 2048 360 (1/8) 111362306a36Sopenharmony_ci * 8192 744 (1/16) 111462306a36Sopenharmony_ci * 32768 1512 (1/32) 111562306a36Sopenharmony_ci */ 111662306a36Sopenharmony_ci if (nr_pages < MB2PAGES(128)) 111762306a36Sopenharmony_ci min_pages = MB2PAGES(8) + (nr_pages >> 1); 111862306a36Sopenharmony_ci else if (nr_pages < MB2PAGES(512)) 111962306a36Sopenharmony_ci min_pages = MB2PAGES(40) + (nr_pages >> 2); 112062306a36Sopenharmony_ci else if (nr_pages < MB2PAGES(2048)) 112162306a36Sopenharmony_ci min_pages = MB2PAGES(104) + (nr_pages >> 3); 112262306a36Sopenharmony_ci else if (nr_pages < MB2PAGES(8192)) 112362306a36Sopenharmony_ci min_pages = MB2PAGES(232) + (nr_pages >> 4); 112462306a36Sopenharmony_ci else 112562306a36Sopenharmony_ci min_pages = MB2PAGES(488) + (nr_pages >> 5); 112662306a36Sopenharmony_ci#undef MB2PAGES 112762306a36Sopenharmony_ci return min_pages; 112862306a36Sopenharmony_ci} 112962306a36Sopenharmony_ci 113062306a36Sopenharmony_ci/* 113162306a36Sopenharmony_ci * Compute total committed memory pages 113262306a36Sopenharmony_ci */ 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_cistatic unsigned long get_pages_committed(struct hv_dynmem_device *dm) 113562306a36Sopenharmony_ci{ 113662306a36Sopenharmony_ci return vm_memory_committed() + 113762306a36Sopenharmony_ci dm->num_pages_ballooned + 113862306a36Sopenharmony_ci (dm->num_pages_added > dm->num_pages_onlined ? 113962306a36Sopenharmony_ci dm->num_pages_added - dm->num_pages_onlined : 0) + 114062306a36Sopenharmony_ci compute_balloon_floor(); 114162306a36Sopenharmony_ci} 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci/* 114462306a36Sopenharmony_ci * Post our status as it relates memory pressure to the 114562306a36Sopenharmony_ci * host. Host expects the guests to post this status 114662306a36Sopenharmony_ci * periodically at 1 second intervals. 114762306a36Sopenharmony_ci * 114862306a36Sopenharmony_ci * The metrics specified in this protocol are very Windows 114962306a36Sopenharmony_ci * specific and so we cook up numbers here to convey our memory 115062306a36Sopenharmony_ci * pressure. 115162306a36Sopenharmony_ci */ 115262306a36Sopenharmony_ci 115362306a36Sopenharmony_cistatic void post_status(struct hv_dynmem_device *dm) 115462306a36Sopenharmony_ci{ 115562306a36Sopenharmony_ci struct dm_status status; 115662306a36Sopenharmony_ci unsigned long now = jiffies; 115762306a36Sopenharmony_ci unsigned long last_post = last_post_time; 115862306a36Sopenharmony_ci unsigned long num_pages_avail, num_pages_committed; 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci if (pressure_report_delay > 0) { 116162306a36Sopenharmony_ci --pressure_report_delay; 116262306a36Sopenharmony_ci return; 116362306a36Sopenharmony_ci } 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci if (!time_after(now, (last_post_time + HZ))) 116662306a36Sopenharmony_ci return; 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci memset(&status, 0, sizeof(struct dm_status)); 116962306a36Sopenharmony_ci status.hdr.type = DM_STATUS_REPORT; 117062306a36Sopenharmony_ci status.hdr.size = sizeof(struct dm_status); 117162306a36Sopenharmony_ci status.hdr.trans_id = atomic_inc_return(&trans_id); 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_ci /* 117462306a36Sopenharmony_ci * The host expects the guest to report free and committed memory. 117562306a36Sopenharmony_ci * Furthermore, the host expects the pressure information to include 117662306a36Sopenharmony_ci * the ballooned out pages. For a given amount of memory that we are 117762306a36Sopenharmony_ci * managing we need to compute a floor below which we should not 117862306a36Sopenharmony_ci * balloon. Compute this and add it to the pressure report. 117962306a36Sopenharmony_ci * We also need to report all offline pages (num_pages_added - 118062306a36Sopenharmony_ci * num_pages_onlined) as committed to the host, otherwise it can try 118162306a36Sopenharmony_ci * asking us to balloon them out. 118262306a36Sopenharmony_ci */ 118362306a36Sopenharmony_ci num_pages_avail = si_mem_available(); 118462306a36Sopenharmony_ci num_pages_committed = get_pages_committed(dm); 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci trace_balloon_status(num_pages_avail, num_pages_committed, 118762306a36Sopenharmony_ci vm_memory_committed(), dm->num_pages_ballooned, 118862306a36Sopenharmony_ci dm->num_pages_added, dm->num_pages_onlined); 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */ 119162306a36Sopenharmony_ci status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE; 119262306a36Sopenharmony_ci status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE; 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci /* 119562306a36Sopenharmony_ci * If our transaction ID is no longer current, just don't 119662306a36Sopenharmony_ci * send the status. This can happen if we were interrupted 119762306a36Sopenharmony_ci * after we picked our transaction ID. 119862306a36Sopenharmony_ci */ 119962306a36Sopenharmony_ci if (status.hdr.trans_id != atomic_read(&trans_id)) 120062306a36Sopenharmony_ci return; 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci /* 120362306a36Sopenharmony_ci * If the last post time that we sampled has changed, 120462306a36Sopenharmony_ci * we have raced, don't post the status. 120562306a36Sopenharmony_ci */ 120662306a36Sopenharmony_ci if (last_post != last_post_time) 120762306a36Sopenharmony_ci return; 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci last_post_time = jiffies; 121062306a36Sopenharmony_ci vmbus_sendpacket(dm->dev->channel, &status, 121162306a36Sopenharmony_ci sizeof(struct dm_status), 121262306a36Sopenharmony_ci (unsigned long)NULL, 121362306a36Sopenharmony_ci VM_PKT_DATA_INBAND, 0); 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci} 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_cistatic void free_balloon_pages(struct hv_dynmem_device *dm, 121862306a36Sopenharmony_ci union dm_mem_page_range *range_array) 121962306a36Sopenharmony_ci{ 122062306a36Sopenharmony_ci int num_pages = range_array->finfo.page_cnt; 122162306a36Sopenharmony_ci __u64 start_frame = range_array->finfo.start_page; 122262306a36Sopenharmony_ci struct page *pg; 122362306a36Sopenharmony_ci int i; 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci for (i = 0; i < num_pages; i++) { 122662306a36Sopenharmony_ci pg = pfn_to_page(i + start_frame); 122762306a36Sopenharmony_ci __ClearPageOffline(pg); 122862306a36Sopenharmony_ci __free_page(pg); 122962306a36Sopenharmony_ci dm->num_pages_ballooned--; 123062306a36Sopenharmony_ci adjust_managed_page_count(pg, 1); 123162306a36Sopenharmony_ci } 123262306a36Sopenharmony_ci} 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_cistatic unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, 123762306a36Sopenharmony_ci unsigned int num_pages, 123862306a36Sopenharmony_ci struct dm_balloon_response *bl_resp, 123962306a36Sopenharmony_ci int alloc_unit) 124062306a36Sopenharmony_ci{ 124162306a36Sopenharmony_ci unsigned int i, j; 124262306a36Sopenharmony_ci struct page *pg; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci for (i = 0; i < num_pages / alloc_unit; i++) { 124562306a36Sopenharmony_ci if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > 124662306a36Sopenharmony_ci HV_HYP_PAGE_SIZE) 124762306a36Sopenharmony_ci return i * alloc_unit; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci /* 125062306a36Sopenharmony_ci * We execute this code in a thread context. Furthermore, 125162306a36Sopenharmony_ci * we don't want the kernel to try too hard. 125262306a36Sopenharmony_ci */ 125362306a36Sopenharmony_ci pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY | 125462306a36Sopenharmony_ci __GFP_NOMEMALLOC | __GFP_NOWARN, 125562306a36Sopenharmony_ci get_order(alloc_unit << PAGE_SHIFT)); 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci if (!pg) 125862306a36Sopenharmony_ci return i * alloc_unit; 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci dm->num_pages_ballooned += alloc_unit; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci /* 126362306a36Sopenharmony_ci * If we allocatted 2M pages; split them so we 126462306a36Sopenharmony_ci * can free them in any order we get. 126562306a36Sopenharmony_ci */ 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci if (alloc_unit != 1) 126862306a36Sopenharmony_ci split_page(pg, get_order(alloc_unit << PAGE_SHIFT)); 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci /* mark all pages offline */ 127162306a36Sopenharmony_ci for (j = 0; j < alloc_unit; j++) { 127262306a36Sopenharmony_ci __SetPageOffline(pg + j); 127362306a36Sopenharmony_ci adjust_managed_page_count(pg + j, -1); 127462306a36Sopenharmony_ci } 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci bl_resp->range_count++; 127762306a36Sopenharmony_ci bl_resp->range_array[i].finfo.start_page = 127862306a36Sopenharmony_ci page_to_pfn(pg); 127962306a36Sopenharmony_ci bl_resp->range_array[i].finfo.page_cnt = alloc_unit; 128062306a36Sopenharmony_ci bl_resp->hdr.size += sizeof(union dm_mem_page_range); 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci } 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci return i * alloc_unit; 128562306a36Sopenharmony_ci} 128662306a36Sopenharmony_ci 128762306a36Sopenharmony_cistatic void balloon_up(struct work_struct *dummy) 128862306a36Sopenharmony_ci{ 128962306a36Sopenharmony_ci unsigned int num_pages = dm_device.balloon_wrk.num_pages; 129062306a36Sopenharmony_ci unsigned int num_ballooned = 0; 129162306a36Sopenharmony_ci struct dm_balloon_response *bl_resp; 129262306a36Sopenharmony_ci int alloc_unit; 129362306a36Sopenharmony_ci int ret; 129462306a36Sopenharmony_ci bool done = false; 129562306a36Sopenharmony_ci int i; 129662306a36Sopenharmony_ci long avail_pages; 129762306a36Sopenharmony_ci unsigned long floor; 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_ci /* 130062306a36Sopenharmony_ci * We will attempt 2M allocations. However, if we fail to 130162306a36Sopenharmony_ci * allocate 2M chunks, we will go back to PAGE_SIZE allocations. 130262306a36Sopenharmony_ci */ 130362306a36Sopenharmony_ci alloc_unit = PAGES_IN_2M; 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci avail_pages = si_mem_available(); 130662306a36Sopenharmony_ci floor = compute_balloon_floor(); 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci /* Refuse to balloon below the floor. */ 130962306a36Sopenharmony_ci if (avail_pages < num_pages || avail_pages - num_pages < floor) { 131062306a36Sopenharmony_ci pr_info("Balloon request will be partially fulfilled. %s\n", 131162306a36Sopenharmony_ci avail_pages < num_pages ? "Not enough memory." : 131262306a36Sopenharmony_ci "Balloon floor reached."); 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_ci num_pages = avail_pages > floor ? (avail_pages - floor) : 0; 131562306a36Sopenharmony_ci } 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci while (!done) { 131862306a36Sopenharmony_ci memset(balloon_up_send_buffer, 0, HV_HYP_PAGE_SIZE); 131962306a36Sopenharmony_ci bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer; 132062306a36Sopenharmony_ci bl_resp->hdr.type = DM_BALLOON_RESPONSE; 132162306a36Sopenharmony_ci bl_resp->hdr.size = sizeof(struct dm_balloon_response); 132262306a36Sopenharmony_ci bl_resp->more_pages = 1; 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci num_pages -= num_ballooned; 132562306a36Sopenharmony_ci num_ballooned = alloc_balloon_pages(&dm_device, num_pages, 132662306a36Sopenharmony_ci bl_resp, alloc_unit); 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci if (alloc_unit != 1 && num_ballooned == 0) { 132962306a36Sopenharmony_ci alloc_unit = 1; 133062306a36Sopenharmony_ci continue; 133162306a36Sopenharmony_ci } 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci if (num_ballooned == 0 || num_ballooned == num_pages) { 133462306a36Sopenharmony_ci pr_debug("Ballooned %u out of %u requested pages.\n", 133562306a36Sopenharmony_ci num_pages, dm_device.balloon_wrk.num_pages); 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci bl_resp->more_pages = 0; 133862306a36Sopenharmony_ci done = true; 133962306a36Sopenharmony_ci dm_device.state = DM_INITIALIZED; 134062306a36Sopenharmony_ci } 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci /* 134362306a36Sopenharmony_ci * We are pushing a lot of data through the channel; 134462306a36Sopenharmony_ci * deal with transient failures caused because of the 134562306a36Sopenharmony_ci * lack of space in the ring buffer. 134662306a36Sopenharmony_ci */ 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_ci do { 134962306a36Sopenharmony_ci bl_resp->hdr.trans_id = atomic_inc_return(&trans_id); 135062306a36Sopenharmony_ci ret = vmbus_sendpacket(dm_device.dev->channel, 135162306a36Sopenharmony_ci bl_resp, 135262306a36Sopenharmony_ci bl_resp->hdr.size, 135362306a36Sopenharmony_ci (unsigned long)NULL, 135462306a36Sopenharmony_ci VM_PKT_DATA_INBAND, 0); 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci if (ret == -EAGAIN) 135762306a36Sopenharmony_ci msleep(20); 135862306a36Sopenharmony_ci post_status(&dm_device); 135962306a36Sopenharmony_ci } while (ret == -EAGAIN); 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci if (ret) { 136262306a36Sopenharmony_ci /* 136362306a36Sopenharmony_ci * Free up the memory we allocatted. 136462306a36Sopenharmony_ci */ 136562306a36Sopenharmony_ci pr_err("Balloon response failed\n"); 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci for (i = 0; i < bl_resp->range_count; i++) 136862306a36Sopenharmony_ci free_balloon_pages(&dm_device, 136962306a36Sopenharmony_ci &bl_resp->range_array[i]); 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci done = true; 137262306a36Sopenharmony_ci } 137362306a36Sopenharmony_ci } 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci} 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_cistatic void balloon_down(struct hv_dynmem_device *dm, 137862306a36Sopenharmony_ci struct dm_unballoon_request *req) 137962306a36Sopenharmony_ci{ 138062306a36Sopenharmony_ci union dm_mem_page_range *range_array = req->range_array; 138162306a36Sopenharmony_ci int range_count = req->range_count; 138262306a36Sopenharmony_ci struct dm_unballoon_response resp; 138362306a36Sopenharmony_ci int i; 138462306a36Sopenharmony_ci unsigned int prev_pages_ballooned = dm->num_pages_ballooned; 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci for (i = 0; i < range_count; i++) { 138762306a36Sopenharmony_ci free_balloon_pages(dm, &range_array[i]); 138862306a36Sopenharmony_ci complete(&dm_device.config_event); 138962306a36Sopenharmony_ci } 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci pr_debug("Freed %u ballooned pages.\n", 139262306a36Sopenharmony_ci prev_pages_ballooned - dm->num_pages_ballooned); 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci if (req->more_pages == 1) 139562306a36Sopenharmony_ci return; 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci memset(&resp, 0, sizeof(struct dm_unballoon_response)); 139862306a36Sopenharmony_ci resp.hdr.type = DM_UNBALLOON_RESPONSE; 139962306a36Sopenharmony_ci resp.hdr.trans_id = atomic_inc_return(&trans_id); 140062306a36Sopenharmony_ci resp.hdr.size = sizeof(struct dm_unballoon_response); 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci vmbus_sendpacket(dm_device.dev->channel, &resp, 140362306a36Sopenharmony_ci sizeof(struct dm_unballoon_response), 140462306a36Sopenharmony_ci (unsigned long)NULL, 140562306a36Sopenharmony_ci VM_PKT_DATA_INBAND, 0); 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci dm->state = DM_INITIALIZED; 140862306a36Sopenharmony_ci} 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_cistatic void balloon_onchannelcallback(void *context); 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_cistatic int dm_thread_func(void *dm_dev) 141362306a36Sopenharmony_ci{ 141462306a36Sopenharmony_ci struct hv_dynmem_device *dm = dm_dev; 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci while (!kthread_should_stop()) { 141762306a36Sopenharmony_ci wait_for_completion_interruptible_timeout( 141862306a36Sopenharmony_ci &dm_device.config_event, 1*HZ); 141962306a36Sopenharmony_ci /* 142062306a36Sopenharmony_ci * The host expects us to post information on the memory 142162306a36Sopenharmony_ci * pressure every second. 142262306a36Sopenharmony_ci */ 142362306a36Sopenharmony_ci reinit_completion(&dm_device.config_event); 142462306a36Sopenharmony_ci post_status(dm); 142562306a36Sopenharmony_ci /* 142662306a36Sopenharmony_ci * disable free page reporting if multiple hypercall 142762306a36Sopenharmony_ci * failure flag set. It is not done in the page_reporting 142862306a36Sopenharmony_ci * callback context as that causes a deadlock between 142962306a36Sopenharmony_ci * page_reporting_process() and page_reporting_unregister() 143062306a36Sopenharmony_ci */ 143162306a36Sopenharmony_ci if (hv_hypercall_multi_failure >= HV_MAX_FAILURES) { 143262306a36Sopenharmony_ci pr_err("Multiple failures in cold memory discard hypercall, disabling page reporting\n"); 143362306a36Sopenharmony_ci disable_page_reporting(); 143462306a36Sopenharmony_ci /* Reset the flag after disabling reporting */ 143562306a36Sopenharmony_ci hv_hypercall_multi_failure = 0; 143662306a36Sopenharmony_ci } 143762306a36Sopenharmony_ci } 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci return 0; 144062306a36Sopenharmony_ci} 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_cistatic void version_resp(struct hv_dynmem_device *dm, 144462306a36Sopenharmony_ci struct dm_version_response *vresp) 144562306a36Sopenharmony_ci{ 144662306a36Sopenharmony_ci struct dm_version_request version_req; 144762306a36Sopenharmony_ci int ret; 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci if (vresp->is_accepted) { 145062306a36Sopenharmony_ci /* 145162306a36Sopenharmony_ci * We are done; wakeup the 145262306a36Sopenharmony_ci * context waiting for version 145362306a36Sopenharmony_ci * negotiation. 145462306a36Sopenharmony_ci */ 145562306a36Sopenharmony_ci complete(&dm->host_event); 145662306a36Sopenharmony_ci return; 145762306a36Sopenharmony_ci } 145862306a36Sopenharmony_ci /* 145962306a36Sopenharmony_ci * If there are more versions to try, continue 146062306a36Sopenharmony_ci * with negotiations; if not 146162306a36Sopenharmony_ci * shutdown the service since we are not able 146262306a36Sopenharmony_ci * to negotiate a suitable version number 146362306a36Sopenharmony_ci * with the host. 146462306a36Sopenharmony_ci */ 146562306a36Sopenharmony_ci if (dm->next_version == 0) 146662306a36Sopenharmony_ci goto version_error; 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci memset(&version_req, 0, sizeof(struct dm_version_request)); 146962306a36Sopenharmony_ci version_req.hdr.type = DM_VERSION_REQUEST; 147062306a36Sopenharmony_ci version_req.hdr.size = sizeof(struct dm_version_request); 147162306a36Sopenharmony_ci version_req.hdr.trans_id = atomic_inc_return(&trans_id); 147262306a36Sopenharmony_ci version_req.version.version = dm->next_version; 147362306a36Sopenharmony_ci dm->version = version_req.version.version; 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci /* 147662306a36Sopenharmony_ci * Set the next version to try in case current version fails. 147762306a36Sopenharmony_ci * Win7 protocol ought to be the last one to try. 147862306a36Sopenharmony_ci */ 147962306a36Sopenharmony_ci switch (version_req.version.version) { 148062306a36Sopenharmony_ci case DYNMEM_PROTOCOL_VERSION_WIN8: 148162306a36Sopenharmony_ci dm->next_version = DYNMEM_PROTOCOL_VERSION_WIN7; 148262306a36Sopenharmony_ci version_req.is_last_attempt = 0; 148362306a36Sopenharmony_ci break; 148462306a36Sopenharmony_ci default: 148562306a36Sopenharmony_ci dm->next_version = 0; 148662306a36Sopenharmony_ci version_req.is_last_attempt = 1; 148762306a36Sopenharmony_ci } 148862306a36Sopenharmony_ci 148962306a36Sopenharmony_ci ret = vmbus_sendpacket(dm->dev->channel, &version_req, 149062306a36Sopenharmony_ci sizeof(struct dm_version_request), 149162306a36Sopenharmony_ci (unsigned long)NULL, 149262306a36Sopenharmony_ci VM_PKT_DATA_INBAND, 0); 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci if (ret) 149562306a36Sopenharmony_ci goto version_error; 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_ci return; 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_civersion_error: 150062306a36Sopenharmony_ci dm->state = DM_INIT_ERROR; 150162306a36Sopenharmony_ci complete(&dm->host_event); 150262306a36Sopenharmony_ci} 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_cistatic void cap_resp(struct hv_dynmem_device *dm, 150562306a36Sopenharmony_ci struct dm_capabilities_resp_msg *cap_resp) 150662306a36Sopenharmony_ci{ 150762306a36Sopenharmony_ci if (!cap_resp->is_accepted) { 150862306a36Sopenharmony_ci pr_err("Capabilities not accepted by host\n"); 150962306a36Sopenharmony_ci dm->state = DM_INIT_ERROR; 151062306a36Sopenharmony_ci } 151162306a36Sopenharmony_ci complete(&dm->host_event); 151262306a36Sopenharmony_ci} 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_cistatic void balloon_onchannelcallback(void *context) 151562306a36Sopenharmony_ci{ 151662306a36Sopenharmony_ci struct hv_device *dev = context; 151762306a36Sopenharmony_ci u32 recvlen; 151862306a36Sopenharmony_ci u64 requestid; 151962306a36Sopenharmony_ci struct dm_message *dm_msg; 152062306a36Sopenharmony_ci struct dm_header *dm_hdr; 152162306a36Sopenharmony_ci struct hv_dynmem_device *dm = hv_get_drvdata(dev); 152262306a36Sopenharmony_ci struct dm_balloon *bal_msg; 152362306a36Sopenharmony_ci struct dm_hot_add *ha_msg; 152462306a36Sopenharmony_ci union dm_mem_page_range *ha_pg_range; 152562306a36Sopenharmony_ci union dm_mem_page_range *ha_region; 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci memset(recv_buffer, 0, sizeof(recv_buffer)); 152862306a36Sopenharmony_ci vmbus_recvpacket(dev->channel, recv_buffer, 152962306a36Sopenharmony_ci HV_HYP_PAGE_SIZE, &recvlen, &requestid); 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci if (recvlen > 0) { 153262306a36Sopenharmony_ci dm_msg = (struct dm_message *)recv_buffer; 153362306a36Sopenharmony_ci dm_hdr = &dm_msg->hdr; 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci switch (dm_hdr->type) { 153662306a36Sopenharmony_ci case DM_VERSION_RESPONSE: 153762306a36Sopenharmony_ci version_resp(dm, 153862306a36Sopenharmony_ci (struct dm_version_response *)dm_msg); 153962306a36Sopenharmony_ci break; 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci case DM_CAPABILITIES_RESPONSE: 154262306a36Sopenharmony_ci cap_resp(dm, 154362306a36Sopenharmony_ci (struct dm_capabilities_resp_msg *)dm_msg); 154462306a36Sopenharmony_ci break; 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci case DM_BALLOON_REQUEST: 154762306a36Sopenharmony_ci if (allow_hibernation) { 154862306a36Sopenharmony_ci pr_info("Ignore balloon-up request!\n"); 154962306a36Sopenharmony_ci break; 155062306a36Sopenharmony_ci } 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci if (dm->state == DM_BALLOON_UP) 155362306a36Sopenharmony_ci pr_warn("Currently ballooning\n"); 155462306a36Sopenharmony_ci bal_msg = (struct dm_balloon *)recv_buffer; 155562306a36Sopenharmony_ci dm->state = DM_BALLOON_UP; 155662306a36Sopenharmony_ci dm_device.balloon_wrk.num_pages = bal_msg->num_pages; 155762306a36Sopenharmony_ci schedule_work(&dm_device.balloon_wrk.wrk); 155862306a36Sopenharmony_ci break; 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci case DM_UNBALLOON_REQUEST: 156162306a36Sopenharmony_ci if (allow_hibernation) { 156262306a36Sopenharmony_ci pr_info("Ignore balloon-down request!\n"); 156362306a36Sopenharmony_ci break; 156462306a36Sopenharmony_ci } 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci dm->state = DM_BALLOON_DOWN; 156762306a36Sopenharmony_ci balloon_down(dm, 156862306a36Sopenharmony_ci (struct dm_unballoon_request *)recv_buffer); 156962306a36Sopenharmony_ci break; 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci case DM_MEM_HOT_ADD_REQUEST: 157262306a36Sopenharmony_ci if (dm->state == DM_HOT_ADD) 157362306a36Sopenharmony_ci pr_warn("Currently hot-adding\n"); 157462306a36Sopenharmony_ci dm->state = DM_HOT_ADD; 157562306a36Sopenharmony_ci ha_msg = (struct dm_hot_add *)recv_buffer; 157662306a36Sopenharmony_ci if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) { 157762306a36Sopenharmony_ci /* 157862306a36Sopenharmony_ci * This is a normal hot-add request specifying 157962306a36Sopenharmony_ci * hot-add memory. 158062306a36Sopenharmony_ci */ 158162306a36Sopenharmony_ci dm->host_specified_ha_region = false; 158262306a36Sopenharmony_ci ha_pg_range = &ha_msg->range; 158362306a36Sopenharmony_ci dm->ha_wrk.ha_page_range = *ha_pg_range; 158462306a36Sopenharmony_ci dm->ha_wrk.ha_region_range.page_range = 0; 158562306a36Sopenharmony_ci } else { 158662306a36Sopenharmony_ci /* 158762306a36Sopenharmony_ci * Host is specifying that we first hot-add 158862306a36Sopenharmony_ci * a region and then partially populate this 158962306a36Sopenharmony_ci * region. 159062306a36Sopenharmony_ci */ 159162306a36Sopenharmony_ci dm->host_specified_ha_region = true; 159262306a36Sopenharmony_ci ha_pg_range = &ha_msg->range; 159362306a36Sopenharmony_ci ha_region = &ha_pg_range[1]; 159462306a36Sopenharmony_ci dm->ha_wrk.ha_page_range = *ha_pg_range; 159562306a36Sopenharmony_ci dm->ha_wrk.ha_region_range = *ha_region; 159662306a36Sopenharmony_ci } 159762306a36Sopenharmony_ci schedule_work(&dm_device.ha_wrk.wrk); 159862306a36Sopenharmony_ci break; 159962306a36Sopenharmony_ci 160062306a36Sopenharmony_ci case DM_INFO_MESSAGE: 160162306a36Sopenharmony_ci process_info(dm, (struct dm_info_msg *)dm_msg); 160262306a36Sopenharmony_ci break; 160362306a36Sopenharmony_ci 160462306a36Sopenharmony_ci default: 160562306a36Sopenharmony_ci pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type); 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci } 160862306a36Sopenharmony_ci } 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_ci} 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci#define HV_LARGE_REPORTING_ORDER 9 161362306a36Sopenharmony_ci#define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \ 161462306a36Sopenharmony_ci HV_LARGE_REPORTING_ORDER) 161562306a36Sopenharmony_cistatic int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, 161662306a36Sopenharmony_ci struct scatterlist *sgl, unsigned int nents) 161762306a36Sopenharmony_ci{ 161862306a36Sopenharmony_ci unsigned long flags; 161962306a36Sopenharmony_ci struct hv_memory_hint *hint; 162062306a36Sopenharmony_ci int i, order; 162162306a36Sopenharmony_ci u64 status; 162262306a36Sopenharmony_ci struct scatterlist *sg; 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_ci WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); 162562306a36Sopenharmony_ci WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order)); 162662306a36Sopenharmony_ci local_irq_save(flags); 162762306a36Sopenharmony_ci hint = *this_cpu_ptr(hyperv_pcpu_input_arg); 162862306a36Sopenharmony_ci if (!hint) { 162962306a36Sopenharmony_ci local_irq_restore(flags); 163062306a36Sopenharmony_ci return -ENOSPC; 163162306a36Sopenharmony_ci } 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD; 163462306a36Sopenharmony_ci hint->reserved = 0; 163562306a36Sopenharmony_ci for_each_sg(sgl, sg, nents, i) { 163662306a36Sopenharmony_ci union hv_gpa_page_range *range; 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci range = &hint->ranges[i]; 163962306a36Sopenharmony_ci range->address_space = 0; 164062306a36Sopenharmony_ci order = get_order(sg->length); 164162306a36Sopenharmony_ci /* 164262306a36Sopenharmony_ci * Hyper-V expects the additional_pages field in the units 164362306a36Sopenharmony_ci * of one of these 3 sizes, 4Kbytes, 2Mbytes or 1Gbytes. 164462306a36Sopenharmony_ci * This is dictated by the values of the fields page.largesize 164562306a36Sopenharmony_ci * and page_size. 164662306a36Sopenharmony_ci * This code however, only uses 4Kbytes and 2Mbytes units 164762306a36Sopenharmony_ci * and not 1Gbytes unit. 164862306a36Sopenharmony_ci */ 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci /* page reporting for pages 2MB or higher */ 165162306a36Sopenharmony_ci if (order >= HV_LARGE_REPORTING_ORDER ) { 165262306a36Sopenharmony_ci range->page.largepage = 1; 165362306a36Sopenharmony_ci range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; 165462306a36Sopenharmony_ci range->base_large_pfn = page_to_hvpfn( 165562306a36Sopenharmony_ci sg_page(sg)) >> HV_LARGE_REPORTING_ORDER; 165662306a36Sopenharmony_ci range->page.additional_pages = 165762306a36Sopenharmony_ci (sg->length / HV_LARGE_REPORTING_LEN) - 1; 165862306a36Sopenharmony_ci } else { 165962306a36Sopenharmony_ci /* Page reporting for pages below 2MB */ 166062306a36Sopenharmony_ci range->page.basepfn = page_to_hvpfn(sg_page(sg)); 166162306a36Sopenharmony_ci range->page.largepage = false; 166262306a36Sopenharmony_ci range->page.additional_pages = 166362306a36Sopenharmony_ci (sg->length / HV_HYP_PAGE_SIZE) - 1; 166462306a36Sopenharmony_ci } 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ci 166862306a36Sopenharmony_ci status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, 166962306a36Sopenharmony_ci hint, NULL); 167062306a36Sopenharmony_ci local_irq_restore(flags); 167162306a36Sopenharmony_ci if (!hv_result_success(status)) { 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci pr_err("Cold memory discard hypercall failed with status %llx\n", 167462306a36Sopenharmony_ci status); 167562306a36Sopenharmony_ci if (hv_hypercall_multi_failure > 0) 167662306a36Sopenharmony_ci hv_hypercall_multi_failure++; 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) { 167962306a36Sopenharmony_ci pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n"); 168062306a36Sopenharmony_ci pr_err("Defaulting to page_reporting_order %d\n", 168162306a36Sopenharmony_ci pageblock_order); 168262306a36Sopenharmony_ci page_reporting_order = pageblock_order; 168362306a36Sopenharmony_ci hv_hypercall_multi_failure++; 168462306a36Sopenharmony_ci return -EINVAL; 168562306a36Sopenharmony_ci } 168662306a36Sopenharmony_ci 168762306a36Sopenharmony_ci return -EINVAL; 168862306a36Sopenharmony_ci } 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci return 0; 169162306a36Sopenharmony_ci} 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_cistatic void enable_page_reporting(void) 169462306a36Sopenharmony_ci{ 169562306a36Sopenharmony_ci int ret; 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ci if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) { 169862306a36Sopenharmony_ci pr_debug("Cold memory discard hint not supported by Hyper-V\n"); 169962306a36Sopenharmony_ci return; 170062306a36Sopenharmony_ci } 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); 170362306a36Sopenharmony_ci dm_device.pr_dev_info.report = hv_free_page_report; 170462306a36Sopenharmony_ci /* 170562306a36Sopenharmony_ci * We let the page_reporting_order parameter decide the order 170662306a36Sopenharmony_ci * in the page_reporting code 170762306a36Sopenharmony_ci */ 170862306a36Sopenharmony_ci dm_device.pr_dev_info.order = 0; 170962306a36Sopenharmony_ci ret = page_reporting_register(&dm_device.pr_dev_info); 171062306a36Sopenharmony_ci if (ret < 0) { 171162306a36Sopenharmony_ci dm_device.pr_dev_info.report = NULL; 171262306a36Sopenharmony_ci pr_err("Failed to enable cold memory discard: %d\n", ret); 171362306a36Sopenharmony_ci } else { 171462306a36Sopenharmony_ci pr_info("Cold memory discard hint enabled with order %d\n", 171562306a36Sopenharmony_ci page_reporting_order); 171662306a36Sopenharmony_ci } 171762306a36Sopenharmony_ci} 171862306a36Sopenharmony_ci 171962306a36Sopenharmony_cistatic void disable_page_reporting(void) 172062306a36Sopenharmony_ci{ 172162306a36Sopenharmony_ci if (dm_device.pr_dev_info.report) { 172262306a36Sopenharmony_ci page_reporting_unregister(&dm_device.pr_dev_info); 172362306a36Sopenharmony_ci dm_device.pr_dev_info.report = NULL; 172462306a36Sopenharmony_ci } 172562306a36Sopenharmony_ci} 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_cistatic int ballooning_enabled(void) 172862306a36Sopenharmony_ci{ 172962306a36Sopenharmony_ci /* 173062306a36Sopenharmony_ci * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE), 173162306a36Sopenharmony_ci * since currently it's unclear to us whether an unballoon request can 173262306a36Sopenharmony_ci * make sure all page ranges are guest page size aligned. 173362306a36Sopenharmony_ci */ 173462306a36Sopenharmony_ci if (PAGE_SIZE != HV_HYP_PAGE_SIZE) { 173562306a36Sopenharmony_ci pr_info("Ballooning disabled because page size is not 4096 bytes\n"); 173662306a36Sopenharmony_ci return 0; 173762306a36Sopenharmony_ci } 173862306a36Sopenharmony_ci 173962306a36Sopenharmony_ci return 1; 174062306a36Sopenharmony_ci} 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_cistatic int hot_add_enabled(void) 174362306a36Sopenharmony_ci{ 174462306a36Sopenharmony_ci /* 174562306a36Sopenharmony_ci * Disable hot add on ARM64, because we currently rely on 174662306a36Sopenharmony_ci * memory_add_physaddr_to_nid() to get a node id of a hot add range, 174762306a36Sopenharmony_ci * however ARM64's memory_add_physaddr_to_nid() always return 0 and 174862306a36Sopenharmony_ci * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for 174962306a36Sopenharmony_ci * add_memory(). 175062306a36Sopenharmony_ci */ 175162306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_ARM64)) { 175262306a36Sopenharmony_ci pr_info("Memory hot add disabled on ARM64\n"); 175362306a36Sopenharmony_ci return 0; 175462306a36Sopenharmony_ci } 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci return 1; 175762306a36Sopenharmony_ci} 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_cistatic int balloon_connect_vsp(struct hv_device *dev) 176062306a36Sopenharmony_ci{ 176162306a36Sopenharmony_ci struct dm_version_request version_req; 176262306a36Sopenharmony_ci struct dm_capabilities cap_msg; 176362306a36Sopenharmony_ci unsigned long t; 176462306a36Sopenharmony_ci int ret; 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ci /* 176762306a36Sopenharmony_ci * max_pkt_size should be large enough for one vmbus packet header plus 176862306a36Sopenharmony_ci * our receive buffer size. Hyper-V sends messages up to 176962306a36Sopenharmony_ci * HV_HYP_PAGE_SIZE bytes long on balloon channel. 177062306a36Sopenharmony_ci */ 177162306a36Sopenharmony_ci dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2; 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_ci ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, 177462306a36Sopenharmony_ci balloon_onchannelcallback, dev); 177562306a36Sopenharmony_ci if (ret) 177662306a36Sopenharmony_ci return ret; 177762306a36Sopenharmony_ci 177862306a36Sopenharmony_ci /* 177962306a36Sopenharmony_ci * Initiate the hand shake with the host and negotiate 178062306a36Sopenharmony_ci * a version that the host can support. We start with the 178162306a36Sopenharmony_ci * highest version number and go down if the host cannot 178262306a36Sopenharmony_ci * support it. 178362306a36Sopenharmony_ci */ 178462306a36Sopenharmony_ci memset(&version_req, 0, sizeof(struct dm_version_request)); 178562306a36Sopenharmony_ci version_req.hdr.type = DM_VERSION_REQUEST; 178662306a36Sopenharmony_ci version_req.hdr.size = sizeof(struct dm_version_request); 178762306a36Sopenharmony_ci version_req.hdr.trans_id = atomic_inc_return(&trans_id); 178862306a36Sopenharmony_ci version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN10; 178962306a36Sopenharmony_ci version_req.is_last_attempt = 0; 179062306a36Sopenharmony_ci dm_device.version = version_req.version.version; 179162306a36Sopenharmony_ci 179262306a36Sopenharmony_ci ret = vmbus_sendpacket(dev->channel, &version_req, 179362306a36Sopenharmony_ci sizeof(struct dm_version_request), 179462306a36Sopenharmony_ci (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); 179562306a36Sopenharmony_ci if (ret) 179662306a36Sopenharmony_ci goto out; 179762306a36Sopenharmony_ci 179862306a36Sopenharmony_ci t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 179962306a36Sopenharmony_ci if (t == 0) { 180062306a36Sopenharmony_ci ret = -ETIMEDOUT; 180162306a36Sopenharmony_ci goto out; 180262306a36Sopenharmony_ci } 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_ci /* 180562306a36Sopenharmony_ci * If we could not negotiate a compatible version with the host 180662306a36Sopenharmony_ci * fail the probe function. 180762306a36Sopenharmony_ci */ 180862306a36Sopenharmony_ci if (dm_device.state == DM_INIT_ERROR) { 180962306a36Sopenharmony_ci ret = -EPROTO; 181062306a36Sopenharmony_ci goto out; 181162306a36Sopenharmony_ci } 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_ci pr_info("Using Dynamic Memory protocol version %u.%u\n", 181462306a36Sopenharmony_ci DYNMEM_MAJOR_VERSION(dm_device.version), 181562306a36Sopenharmony_ci DYNMEM_MINOR_VERSION(dm_device.version)); 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci /* 181862306a36Sopenharmony_ci * Now submit our capabilities to the host. 181962306a36Sopenharmony_ci */ 182062306a36Sopenharmony_ci memset(&cap_msg, 0, sizeof(struct dm_capabilities)); 182162306a36Sopenharmony_ci cap_msg.hdr.type = DM_CAPABILITIES_REPORT; 182262306a36Sopenharmony_ci cap_msg.hdr.size = sizeof(struct dm_capabilities); 182362306a36Sopenharmony_ci cap_msg.hdr.trans_id = atomic_inc_return(&trans_id); 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ci /* 182662306a36Sopenharmony_ci * When hibernation (i.e. virtual ACPI S4 state) is enabled, the host 182762306a36Sopenharmony_ci * currently still requires the bits to be set, so we have to add code 182862306a36Sopenharmony_ci * to fail the host's hot-add and balloon up/down requests, if any. 182962306a36Sopenharmony_ci */ 183062306a36Sopenharmony_ci cap_msg.caps.cap_bits.balloon = ballooning_enabled(); 183162306a36Sopenharmony_ci cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci /* 183462306a36Sopenharmony_ci * Specify our alignment requirements as it relates 183562306a36Sopenharmony_ci * memory hot-add. Specify 128MB alignment. 183662306a36Sopenharmony_ci */ 183762306a36Sopenharmony_ci cap_msg.caps.cap_bits.hot_add_alignment = 7; 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci /* 184062306a36Sopenharmony_ci * Currently the host does not use these 184162306a36Sopenharmony_ci * values and we set them to what is done in the 184262306a36Sopenharmony_ci * Windows driver. 184362306a36Sopenharmony_ci */ 184462306a36Sopenharmony_ci cap_msg.min_page_cnt = 0; 184562306a36Sopenharmony_ci cap_msg.max_page_number = -1; 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci ret = vmbus_sendpacket(dev->channel, &cap_msg, 184862306a36Sopenharmony_ci sizeof(struct dm_capabilities), 184962306a36Sopenharmony_ci (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); 185062306a36Sopenharmony_ci if (ret) 185162306a36Sopenharmony_ci goto out; 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_ci t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 185462306a36Sopenharmony_ci if (t == 0) { 185562306a36Sopenharmony_ci ret = -ETIMEDOUT; 185662306a36Sopenharmony_ci goto out; 185762306a36Sopenharmony_ci } 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci /* 186062306a36Sopenharmony_ci * If the host does not like our capabilities, 186162306a36Sopenharmony_ci * fail the probe function. 186262306a36Sopenharmony_ci */ 186362306a36Sopenharmony_ci if (dm_device.state == DM_INIT_ERROR) { 186462306a36Sopenharmony_ci ret = -EPROTO; 186562306a36Sopenharmony_ci goto out; 186662306a36Sopenharmony_ci } 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci return 0; 186962306a36Sopenharmony_ciout: 187062306a36Sopenharmony_ci vmbus_close(dev->channel); 187162306a36Sopenharmony_ci return ret; 187262306a36Sopenharmony_ci} 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci/* 187562306a36Sopenharmony_ci * DEBUGFS Interface 187662306a36Sopenharmony_ci */ 187762306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_FS 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci/** 188062306a36Sopenharmony_ci * hv_balloon_debug_show - shows statistics of balloon operations. 188162306a36Sopenharmony_ci * @f: pointer to the &struct seq_file. 188262306a36Sopenharmony_ci * @offset: ignored. 188362306a36Sopenharmony_ci * 188462306a36Sopenharmony_ci * Provides the statistics that can be accessed in hv-balloon in the debugfs. 188562306a36Sopenharmony_ci * 188662306a36Sopenharmony_ci * Return: zero on success or an error code. 188762306a36Sopenharmony_ci */ 188862306a36Sopenharmony_cistatic int hv_balloon_debug_show(struct seq_file *f, void *offset) 188962306a36Sopenharmony_ci{ 189062306a36Sopenharmony_ci struct hv_dynmem_device *dm = f->private; 189162306a36Sopenharmony_ci char *sname; 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci seq_printf(f, "%-22s: %u.%u\n", "host_version", 189462306a36Sopenharmony_ci DYNMEM_MAJOR_VERSION(dm->version), 189562306a36Sopenharmony_ci DYNMEM_MINOR_VERSION(dm->version)); 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci seq_printf(f, "%-22s:", "capabilities"); 189862306a36Sopenharmony_ci if (ballooning_enabled()) 189962306a36Sopenharmony_ci seq_puts(f, " enabled"); 190062306a36Sopenharmony_ci 190162306a36Sopenharmony_ci if (hot_add_enabled()) 190262306a36Sopenharmony_ci seq_puts(f, " hot_add"); 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci seq_puts(f, "\n"); 190562306a36Sopenharmony_ci 190662306a36Sopenharmony_ci seq_printf(f, "%-22s: %u", "state", dm->state); 190762306a36Sopenharmony_ci switch (dm->state) { 190862306a36Sopenharmony_ci case DM_INITIALIZING: 190962306a36Sopenharmony_ci sname = "Initializing"; 191062306a36Sopenharmony_ci break; 191162306a36Sopenharmony_ci case DM_INITIALIZED: 191262306a36Sopenharmony_ci sname = "Initialized"; 191362306a36Sopenharmony_ci break; 191462306a36Sopenharmony_ci case DM_BALLOON_UP: 191562306a36Sopenharmony_ci sname = "Balloon Up"; 191662306a36Sopenharmony_ci break; 191762306a36Sopenharmony_ci case DM_BALLOON_DOWN: 191862306a36Sopenharmony_ci sname = "Balloon Down"; 191962306a36Sopenharmony_ci break; 192062306a36Sopenharmony_ci case DM_HOT_ADD: 192162306a36Sopenharmony_ci sname = "Hot Add"; 192262306a36Sopenharmony_ci break; 192362306a36Sopenharmony_ci case DM_INIT_ERROR: 192462306a36Sopenharmony_ci sname = "Error"; 192562306a36Sopenharmony_ci break; 192662306a36Sopenharmony_ci default: 192762306a36Sopenharmony_ci sname = "Unknown"; 192862306a36Sopenharmony_ci } 192962306a36Sopenharmony_ci seq_printf(f, " (%s)\n", sname); 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci /* HV Page Size */ 193262306a36Sopenharmony_ci seq_printf(f, "%-22s: %ld\n", "page_size", HV_HYP_PAGE_SIZE); 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci /* Pages added with hot_add */ 193562306a36Sopenharmony_ci seq_printf(f, "%-22s: %u\n", "pages_added", dm->num_pages_added); 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci /* pages that are "onlined"/used from pages_added */ 193862306a36Sopenharmony_ci seq_printf(f, "%-22s: %u\n", "pages_onlined", dm->num_pages_onlined); 193962306a36Sopenharmony_ci 194062306a36Sopenharmony_ci /* pages we have given back to host */ 194162306a36Sopenharmony_ci seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned); 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_ci seq_printf(f, "%-22s: %lu\n", "total_pages_committed", 194462306a36Sopenharmony_ci get_pages_committed(dm)); 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count", 194762306a36Sopenharmony_ci dm->max_dynamic_page_count); 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_ci return 0; 195062306a36Sopenharmony_ci} 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ciDEFINE_SHOW_ATTRIBUTE(hv_balloon_debug); 195362306a36Sopenharmony_ci 195462306a36Sopenharmony_cistatic void hv_balloon_debugfs_init(struct hv_dynmem_device *b) 195562306a36Sopenharmony_ci{ 195662306a36Sopenharmony_ci debugfs_create_file("hv-balloon", 0444, NULL, b, 195762306a36Sopenharmony_ci &hv_balloon_debug_fops); 195862306a36Sopenharmony_ci} 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_cistatic void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) 196162306a36Sopenharmony_ci{ 196262306a36Sopenharmony_ci debugfs_lookup_and_remove("hv-balloon", NULL); 196362306a36Sopenharmony_ci} 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_ci#else 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_cistatic inline void hv_balloon_debugfs_init(struct hv_dynmem_device *b) 196862306a36Sopenharmony_ci{ 196962306a36Sopenharmony_ci} 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_cistatic inline void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) 197262306a36Sopenharmony_ci{ 197362306a36Sopenharmony_ci} 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci#endif /* CONFIG_DEBUG_FS */ 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_cistatic int balloon_probe(struct hv_device *dev, 197862306a36Sopenharmony_ci const struct hv_vmbus_device_id *dev_id) 197962306a36Sopenharmony_ci{ 198062306a36Sopenharmony_ci int ret; 198162306a36Sopenharmony_ci 198262306a36Sopenharmony_ci allow_hibernation = hv_is_hibernation_supported(); 198362306a36Sopenharmony_ci if (allow_hibernation) 198462306a36Sopenharmony_ci hot_add = false; 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 198762306a36Sopenharmony_ci do_hot_add = hot_add; 198862306a36Sopenharmony_ci#else 198962306a36Sopenharmony_ci do_hot_add = false; 199062306a36Sopenharmony_ci#endif 199162306a36Sopenharmony_ci dm_device.dev = dev; 199262306a36Sopenharmony_ci dm_device.state = DM_INITIALIZING; 199362306a36Sopenharmony_ci dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8; 199462306a36Sopenharmony_ci init_completion(&dm_device.host_event); 199562306a36Sopenharmony_ci init_completion(&dm_device.config_event); 199662306a36Sopenharmony_ci INIT_LIST_HEAD(&dm_device.ha_region_list); 199762306a36Sopenharmony_ci spin_lock_init(&dm_device.ha_lock); 199862306a36Sopenharmony_ci INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); 199962306a36Sopenharmony_ci INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); 200062306a36Sopenharmony_ci dm_device.host_specified_ha_region = false; 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 200362306a36Sopenharmony_ci set_online_page_callback(&hv_online_page); 200462306a36Sopenharmony_ci init_completion(&dm_device.ol_waitevent); 200562306a36Sopenharmony_ci register_memory_notifier(&hv_memory_nb); 200662306a36Sopenharmony_ci#endif 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci hv_set_drvdata(dev, &dm_device); 200962306a36Sopenharmony_ci 201062306a36Sopenharmony_ci ret = balloon_connect_vsp(dev); 201162306a36Sopenharmony_ci if (ret != 0) 201262306a36Sopenharmony_ci goto connect_error; 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci enable_page_reporting(); 201562306a36Sopenharmony_ci dm_device.state = DM_INITIALIZED; 201662306a36Sopenharmony_ci 201762306a36Sopenharmony_ci dm_device.thread = 201862306a36Sopenharmony_ci kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 201962306a36Sopenharmony_ci if (IS_ERR(dm_device.thread)) { 202062306a36Sopenharmony_ci ret = PTR_ERR(dm_device.thread); 202162306a36Sopenharmony_ci goto probe_error; 202262306a36Sopenharmony_ci } 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_ci hv_balloon_debugfs_init(&dm_device); 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci return 0; 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ciprobe_error: 202962306a36Sopenharmony_ci dm_device.state = DM_INIT_ERROR; 203062306a36Sopenharmony_ci dm_device.thread = NULL; 203162306a36Sopenharmony_ci disable_page_reporting(); 203262306a36Sopenharmony_ci vmbus_close(dev->channel); 203362306a36Sopenharmony_ciconnect_error: 203462306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 203562306a36Sopenharmony_ci unregister_memory_notifier(&hv_memory_nb); 203662306a36Sopenharmony_ci restore_online_page_callback(&hv_online_page); 203762306a36Sopenharmony_ci#endif 203862306a36Sopenharmony_ci return ret; 203962306a36Sopenharmony_ci} 204062306a36Sopenharmony_ci 204162306a36Sopenharmony_cistatic void balloon_remove(struct hv_device *dev) 204262306a36Sopenharmony_ci{ 204362306a36Sopenharmony_ci struct hv_dynmem_device *dm = hv_get_drvdata(dev); 204462306a36Sopenharmony_ci struct hv_hotadd_state *has, *tmp; 204562306a36Sopenharmony_ci struct hv_hotadd_gap *gap, *tmp_gap; 204662306a36Sopenharmony_ci 204762306a36Sopenharmony_ci if (dm->num_pages_ballooned != 0) 204862306a36Sopenharmony_ci pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci hv_balloon_debugfs_exit(dm); 205162306a36Sopenharmony_ci 205262306a36Sopenharmony_ci cancel_work_sync(&dm->balloon_wrk.wrk); 205362306a36Sopenharmony_ci cancel_work_sync(&dm->ha_wrk.wrk); 205462306a36Sopenharmony_ci 205562306a36Sopenharmony_ci kthread_stop(dm->thread); 205662306a36Sopenharmony_ci 205762306a36Sopenharmony_ci /* 205862306a36Sopenharmony_ci * This is to handle the case when balloon_resume() 205962306a36Sopenharmony_ci * call has failed and some cleanup has been done as 206062306a36Sopenharmony_ci * a part of the error handling. 206162306a36Sopenharmony_ci */ 206262306a36Sopenharmony_ci if (dm_device.state != DM_INIT_ERROR) { 206362306a36Sopenharmony_ci disable_page_reporting(); 206462306a36Sopenharmony_ci vmbus_close(dev->channel); 206562306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 206662306a36Sopenharmony_ci unregister_memory_notifier(&hv_memory_nb); 206762306a36Sopenharmony_ci restore_online_page_callback(&hv_online_page); 206862306a36Sopenharmony_ci#endif 206962306a36Sopenharmony_ci } 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_ci guard(spinlock_irqsave)(&dm_device.ha_lock); 207262306a36Sopenharmony_ci list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) { 207362306a36Sopenharmony_ci list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) { 207462306a36Sopenharmony_ci list_del(&gap->list); 207562306a36Sopenharmony_ci kfree(gap); 207662306a36Sopenharmony_ci } 207762306a36Sopenharmony_ci list_del(&has->list); 207862306a36Sopenharmony_ci kfree(has); 207962306a36Sopenharmony_ci } 208062306a36Sopenharmony_ci} 208162306a36Sopenharmony_ci 208262306a36Sopenharmony_cistatic int balloon_suspend(struct hv_device *hv_dev) 208362306a36Sopenharmony_ci{ 208462306a36Sopenharmony_ci struct hv_dynmem_device *dm = hv_get_drvdata(hv_dev); 208562306a36Sopenharmony_ci 208662306a36Sopenharmony_ci tasklet_disable(&hv_dev->channel->callback_event); 208762306a36Sopenharmony_ci 208862306a36Sopenharmony_ci cancel_work_sync(&dm->balloon_wrk.wrk); 208962306a36Sopenharmony_ci cancel_work_sync(&dm->ha_wrk.wrk); 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_ci if (dm->thread) { 209262306a36Sopenharmony_ci kthread_stop(dm->thread); 209362306a36Sopenharmony_ci dm->thread = NULL; 209462306a36Sopenharmony_ci vmbus_close(hv_dev->channel); 209562306a36Sopenharmony_ci } 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci tasklet_enable(&hv_dev->channel->callback_event); 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_ci return 0; 210062306a36Sopenharmony_ci 210162306a36Sopenharmony_ci} 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_cistatic int balloon_resume(struct hv_device *dev) 210462306a36Sopenharmony_ci{ 210562306a36Sopenharmony_ci int ret; 210662306a36Sopenharmony_ci 210762306a36Sopenharmony_ci dm_device.state = DM_INITIALIZING; 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci ret = balloon_connect_vsp(dev); 211062306a36Sopenharmony_ci 211162306a36Sopenharmony_ci if (ret != 0) 211262306a36Sopenharmony_ci goto out; 211362306a36Sopenharmony_ci 211462306a36Sopenharmony_ci dm_device.thread = 211562306a36Sopenharmony_ci kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 211662306a36Sopenharmony_ci if (IS_ERR(dm_device.thread)) { 211762306a36Sopenharmony_ci ret = PTR_ERR(dm_device.thread); 211862306a36Sopenharmony_ci dm_device.thread = NULL; 211962306a36Sopenharmony_ci goto close_channel; 212062306a36Sopenharmony_ci } 212162306a36Sopenharmony_ci 212262306a36Sopenharmony_ci dm_device.state = DM_INITIALIZED; 212362306a36Sopenharmony_ci return 0; 212462306a36Sopenharmony_ciclose_channel: 212562306a36Sopenharmony_ci vmbus_close(dev->channel); 212662306a36Sopenharmony_ciout: 212762306a36Sopenharmony_ci dm_device.state = DM_INIT_ERROR; 212862306a36Sopenharmony_ci disable_page_reporting(); 212962306a36Sopenharmony_ci#ifdef CONFIG_MEMORY_HOTPLUG 213062306a36Sopenharmony_ci unregister_memory_notifier(&hv_memory_nb); 213162306a36Sopenharmony_ci restore_online_page_callback(&hv_online_page); 213262306a36Sopenharmony_ci#endif 213362306a36Sopenharmony_ci return ret; 213462306a36Sopenharmony_ci} 213562306a36Sopenharmony_ci 213662306a36Sopenharmony_cistatic const struct hv_vmbus_device_id id_table[] = { 213762306a36Sopenharmony_ci /* Dynamic Memory Class ID */ 213862306a36Sopenharmony_ci /* 525074DC-8985-46e2-8057-A307DC18A502 */ 213962306a36Sopenharmony_ci { HV_DM_GUID, }, 214062306a36Sopenharmony_ci { }, 214162306a36Sopenharmony_ci}; 214262306a36Sopenharmony_ci 214362306a36Sopenharmony_ciMODULE_DEVICE_TABLE(vmbus, id_table); 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_cistatic struct hv_driver balloon_drv = { 214662306a36Sopenharmony_ci .name = "hv_balloon", 214762306a36Sopenharmony_ci .id_table = id_table, 214862306a36Sopenharmony_ci .probe = balloon_probe, 214962306a36Sopenharmony_ci .remove = balloon_remove, 215062306a36Sopenharmony_ci .suspend = balloon_suspend, 215162306a36Sopenharmony_ci .resume = balloon_resume, 215262306a36Sopenharmony_ci .driver = { 215362306a36Sopenharmony_ci .probe_type = PROBE_PREFER_ASYNCHRONOUS, 215462306a36Sopenharmony_ci }, 215562306a36Sopenharmony_ci}; 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_cistatic int __init init_balloon_drv(void) 215862306a36Sopenharmony_ci{ 215962306a36Sopenharmony_ci 216062306a36Sopenharmony_ci return vmbus_driver_register(&balloon_drv); 216162306a36Sopenharmony_ci} 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_cimodule_init(init_balloon_drv); 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ciMODULE_DESCRIPTION("Hyper-V Balloon"); 216662306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 2167