162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) Microsoft Corporation.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Author:
662306a36Sopenharmony_ci *   Jake Oshins <jakeo@microsoft.com>
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * This driver acts as a paravirtual front-end for PCI Express root buses.
962306a36Sopenharmony_ci * When a PCI Express function (either an entire device or an SR-IOV
1062306a36Sopenharmony_ci * Virtual Function) is being passed through to the VM, this driver exposes
1162306a36Sopenharmony_ci * a new bus to the guest VM.  This is modeled as a root PCI bus because
1262306a36Sopenharmony_ci * no bridges are being exposed to the VM.  In fact, with a "Generation 2"
1362306a36Sopenharmony_ci * VM within Hyper-V, there may seem to be no PCI bus at all in the VM
1462306a36Sopenharmony_ci * until a device as been exposed using this driver.
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci * Each root PCI bus has its own PCI domain, which is called "Segment" in
1762306a36Sopenharmony_ci * the PCI Firmware Specifications.  Thus while each device passed through
1862306a36Sopenharmony_ci * to the VM using this front-end will appear at "device 0", the domain will
1962306a36Sopenharmony_ci * be unique.  Typically, each bus will have one PCI function on it, though
2062306a36Sopenharmony_ci * this driver does support more than one.
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * In order to map the interrupts from the device through to the guest VM,
2362306a36Sopenharmony_ci * this driver also implements an IRQ Domain, which handles interrupts (either
2462306a36Sopenharmony_ci * MSI or MSI-X) associated with the functions on the bus.  As interrupts are
2562306a36Sopenharmony_ci * set up, torn down, or reaffined, this driver communicates with the
2662306a36Sopenharmony_ci * underlying hypervisor to adjust the mappings in the I/O MMU so that each
2762306a36Sopenharmony_ci * interrupt will be delivered to the correct virtual processor at the right
2862306a36Sopenharmony_ci * vector.  This driver does not support level-triggered (line-based)
2962306a36Sopenharmony_ci * interrupts, and will report that the Interrupt Line register in the
3062306a36Sopenharmony_ci * function's configuration space is zero.
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci * The rest of this driver mostly maps PCI concepts onto underlying Hyper-V
3362306a36Sopenharmony_ci * facilities.  For instance, the configuration space of a function exposed
3462306a36Sopenharmony_ci * by Hyper-V is mapped into a single page of memory space, and the
3562306a36Sopenharmony_ci * read and write handlers for config space must be aware of this mechanism.
3662306a36Sopenharmony_ci * Similarly, device setup and teardown involves messages sent to and from
3762306a36Sopenharmony_ci * the PCI back-end driver in Hyper-V.
3862306a36Sopenharmony_ci */
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#include <linux/kernel.h>
4162306a36Sopenharmony_ci#include <linux/module.h>
4262306a36Sopenharmony_ci#include <linux/pci.h>
4362306a36Sopenharmony_ci#include <linux/pci-ecam.h>
4462306a36Sopenharmony_ci#include <linux/delay.h>
4562306a36Sopenharmony_ci#include <linux/semaphore.h>
4662306a36Sopenharmony_ci#include <linux/irq.h>
4762306a36Sopenharmony_ci#include <linux/msi.h>
4862306a36Sopenharmony_ci#include <linux/hyperv.h>
4962306a36Sopenharmony_ci#include <linux/refcount.h>
5062306a36Sopenharmony_ci#include <linux/irqdomain.h>
5162306a36Sopenharmony_ci#include <linux/acpi.h>
5262306a36Sopenharmony_ci#include <linux/sizes.h>
5362306a36Sopenharmony_ci#include <asm/mshyperv.h>
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/*
5662306a36Sopenharmony_ci * Protocol versions. The low word is the minor version, the high word the
5762306a36Sopenharmony_ci * major version.
5862306a36Sopenharmony_ci */
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci#define PCI_MAKE_VERSION(major, minor) ((u32)(((major) << 16) | (minor)))
6162306a36Sopenharmony_ci#define PCI_MAJOR_VERSION(version) ((u32)(version) >> 16)
6262306a36Sopenharmony_ci#define PCI_MINOR_VERSION(version) ((u32)(version) & 0xff)
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_cienum pci_protocol_version_t {
6562306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),	/* Win10 */
6662306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2),	/* RS1 */
6762306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_3 = PCI_MAKE_VERSION(1, 3),	/* Vibranium */
6862306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),	/* WS2022 */
6962306a36Sopenharmony_ci};
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci#define CPU_AFFINITY_ALL	-1ULL
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci/*
7462306a36Sopenharmony_ci * Supported protocol versions in the order of probing - highest go
7562306a36Sopenharmony_ci * first.
7662306a36Sopenharmony_ci */
7762306a36Sopenharmony_cistatic enum pci_protocol_version_t pci_protocol_versions[] = {
7862306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_4,
7962306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_3,
8062306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_2,
8162306a36Sopenharmony_ci	PCI_PROTOCOL_VERSION_1_1,
8262306a36Sopenharmony_ci};
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci#define PCI_CONFIG_MMIO_LENGTH	0x2000
8562306a36Sopenharmony_ci#define CFG_PAGE_OFFSET 0x1000
8662306a36Sopenharmony_ci#define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci#define MAX_SUPPORTED_MSI_MESSAGES 0x400
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci#define STATUS_REVISION_MISMATCH 0xC0000059
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci/* space for 32bit serial number as string */
9362306a36Sopenharmony_ci#define SLOT_NAME_SIZE 11
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci/*
9662306a36Sopenharmony_ci * Size of requestor for VMbus; the value is based on the observation
9762306a36Sopenharmony_ci * that having more than one request outstanding is 'rare', and so 64
9862306a36Sopenharmony_ci * should be generous in ensuring that we don't ever run out.
9962306a36Sopenharmony_ci */
10062306a36Sopenharmony_ci#define HV_PCI_RQSTOR_SIZE 64
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci/*
10362306a36Sopenharmony_ci * Message Types
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_cienum pci_message_type {
10762306a36Sopenharmony_ci	/*
10862306a36Sopenharmony_ci	 * Version 1.1
10962306a36Sopenharmony_ci	 */
11062306a36Sopenharmony_ci	PCI_MESSAGE_BASE                = 0x42490000,
11162306a36Sopenharmony_ci	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
11262306a36Sopenharmony_ci	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
11362306a36Sopenharmony_ci	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
11462306a36Sopenharmony_ci	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
11562306a36Sopenharmony_ci	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
11662306a36Sopenharmony_ci	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
11762306a36Sopenharmony_ci	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
11862306a36Sopenharmony_ci	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
11962306a36Sopenharmony_ci	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
12062306a36Sopenharmony_ci	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
12162306a36Sopenharmony_ci	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
12262306a36Sopenharmony_ci	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
12362306a36Sopenharmony_ci	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
12462306a36Sopenharmony_ci	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
12562306a36Sopenharmony_ci	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
12662306a36Sopenharmony_ci	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
12762306a36Sopenharmony_ci	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
12862306a36Sopenharmony_ci	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
12962306a36Sopenharmony_ci	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
13062306a36Sopenharmony_ci	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
13162306a36Sopenharmony_ci	PCI_RESOURCES_ASSIGNED2		= PCI_MESSAGE_BASE + 0x16,
13262306a36Sopenharmony_ci	PCI_CREATE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x17,
13362306a36Sopenharmony_ci	PCI_DELETE_INTERRUPT_MESSAGE2	= PCI_MESSAGE_BASE + 0x18, /* unused */
13462306a36Sopenharmony_ci	PCI_BUS_RELATIONS2		= PCI_MESSAGE_BASE + 0x19,
13562306a36Sopenharmony_ci	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
13662306a36Sopenharmony_ci	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
13762306a36Sopenharmony_ci	PCI_MESSAGE_MAXIMUM
13862306a36Sopenharmony_ci};
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci/*
14162306a36Sopenharmony_ci * Structures defining the virtual PCI Express protocol.
14262306a36Sopenharmony_ci */
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ciunion pci_version {
14562306a36Sopenharmony_ci	struct {
14662306a36Sopenharmony_ci		u16 minor_version;
14762306a36Sopenharmony_ci		u16 major_version;
14862306a36Sopenharmony_ci	} parts;
14962306a36Sopenharmony_ci	u32 version;
15062306a36Sopenharmony_ci} __packed;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci/*
15362306a36Sopenharmony_ci * Function numbers are 8-bits wide on Express, as interpreted through ARI,
15462306a36Sopenharmony_ci * which is all this driver does.  This representation is the one used in
15562306a36Sopenharmony_ci * Windows, which is what is expected when sending this back and forth with
15662306a36Sopenharmony_ci * the Hyper-V parent partition.
15762306a36Sopenharmony_ci */
15862306a36Sopenharmony_ciunion win_slot_encoding {
15962306a36Sopenharmony_ci	struct {
16062306a36Sopenharmony_ci		u32	dev:5;
16162306a36Sopenharmony_ci		u32	func:3;
16262306a36Sopenharmony_ci		u32	reserved:24;
16362306a36Sopenharmony_ci	} bits;
16462306a36Sopenharmony_ci	u32 slot;
16562306a36Sopenharmony_ci} __packed;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci/*
16862306a36Sopenharmony_ci * Pretty much as defined in the PCI Specifications.
16962306a36Sopenharmony_ci */
17062306a36Sopenharmony_cistruct pci_function_description {
17162306a36Sopenharmony_ci	u16	v_id;	/* vendor ID */
17262306a36Sopenharmony_ci	u16	d_id;	/* device ID */
17362306a36Sopenharmony_ci	u8	rev;
17462306a36Sopenharmony_ci	u8	prog_intf;
17562306a36Sopenharmony_ci	u8	subclass;
17662306a36Sopenharmony_ci	u8	base_class;
17762306a36Sopenharmony_ci	u32	subsystem_id;
17862306a36Sopenharmony_ci	union win_slot_encoding win_slot;
17962306a36Sopenharmony_ci	u32	ser;	/* serial number */
18062306a36Sopenharmony_ci} __packed;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_cienum pci_device_description_flags {
18362306a36Sopenharmony_ci	HV_PCI_DEVICE_FLAG_NONE			= 0x0,
18462306a36Sopenharmony_ci	HV_PCI_DEVICE_FLAG_NUMA_AFFINITY	= 0x1,
18562306a36Sopenharmony_ci};
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_cistruct pci_function_description2 {
18862306a36Sopenharmony_ci	u16	v_id;	/* vendor ID */
18962306a36Sopenharmony_ci	u16	d_id;	/* device ID */
19062306a36Sopenharmony_ci	u8	rev;
19162306a36Sopenharmony_ci	u8	prog_intf;
19262306a36Sopenharmony_ci	u8	subclass;
19362306a36Sopenharmony_ci	u8	base_class;
19462306a36Sopenharmony_ci	u32	subsystem_id;
19562306a36Sopenharmony_ci	union	win_slot_encoding win_slot;
19662306a36Sopenharmony_ci	u32	ser;	/* serial number */
19762306a36Sopenharmony_ci	u32	flags;
19862306a36Sopenharmony_ci	u16	virtual_numa_node;
19962306a36Sopenharmony_ci	u16	reserved;
20062306a36Sopenharmony_ci} __packed;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci/**
20362306a36Sopenharmony_ci * struct hv_msi_desc
20462306a36Sopenharmony_ci * @vector:		IDT entry
20562306a36Sopenharmony_ci * @delivery_mode:	As defined in Intel's Programmer's
20662306a36Sopenharmony_ci *			Reference Manual, Volume 3, Chapter 8.
20762306a36Sopenharmony_ci * @vector_count:	Number of contiguous entries in the
20862306a36Sopenharmony_ci *			Interrupt Descriptor Table that are
20962306a36Sopenharmony_ci *			occupied by this Message-Signaled
21062306a36Sopenharmony_ci *			Interrupt. For "MSI", as first defined
21162306a36Sopenharmony_ci *			in PCI 2.2, this can be between 1 and
21262306a36Sopenharmony_ci *			32. For "MSI-X," as first defined in PCI
21362306a36Sopenharmony_ci *			3.0, this must be 1, as each MSI-X table
21462306a36Sopenharmony_ci *			entry would have its own descriptor.
21562306a36Sopenharmony_ci * @reserved:		Empty space
21662306a36Sopenharmony_ci * @cpu_mask:		All the target virtual processors.
21762306a36Sopenharmony_ci */
21862306a36Sopenharmony_cistruct hv_msi_desc {
21962306a36Sopenharmony_ci	u8	vector;
22062306a36Sopenharmony_ci	u8	delivery_mode;
22162306a36Sopenharmony_ci	u16	vector_count;
22262306a36Sopenharmony_ci	u32	reserved;
22362306a36Sopenharmony_ci	u64	cpu_mask;
22462306a36Sopenharmony_ci} __packed;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci/**
22762306a36Sopenharmony_ci * struct hv_msi_desc2 - 1.2 version of hv_msi_desc
22862306a36Sopenharmony_ci * @vector:		IDT entry
22962306a36Sopenharmony_ci * @delivery_mode:	As defined in Intel's Programmer's
23062306a36Sopenharmony_ci *			Reference Manual, Volume 3, Chapter 8.
23162306a36Sopenharmony_ci * @vector_count:	Number of contiguous entries in the
23262306a36Sopenharmony_ci *			Interrupt Descriptor Table that are
23362306a36Sopenharmony_ci *			occupied by this Message-Signaled
23462306a36Sopenharmony_ci *			Interrupt. For "MSI", as first defined
23562306a36Sopenharmony_ci *			in PCI 2.2, this can be between 1 and
23662306a36Sopenharmony_ci *			32. For "MSI-X," as first defined in PCI
23762306a36Sopenharmony_ci *			3.0, this must be 1, as each MSI-X table
23862306a36Sopenharmony_ci *			entry would have its own descriptor.
23962306a36Sopenharmony_ci * @processor_count:	number of bits enabled in array.
24062306a36Sopenharmony_ci * @processor_array:	All the target virtual processors.
24162306a36Sopenharmony_ci */
24262306a36Sopenharmony_cistruct hv_msi_desc2 {
24362306a36Sopenharmony_ci	u8	vector;
24462306a36Sopenharmony_ci	u8	delivery_mode;
24562306a36Sopenharmony_ci	u16	vector_count;
24662306a36Sopenharmony_ci	u16	processor_count;
24762306a36Sopenharmony_ci	u16	processor_array[32];
24862306a36Sopenharmony_ci} __packed;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci/*
25162306a36Sopenharmony_ci * struct hv_msi_desc3 - 1.3 version of hv_msi_desc
25262306a36Sopenharmony_ci *	Everything is the same as in 'hv_msi_desc2' except that the size of the
25362306a36Sopenharmony_ci *	'vector' field is larger to support bigger vector values. For ex: LPI
25462306a36Sopenharmony_ci *	vectors on ARM.
25562306a36Sopenharmony_ci */
25662306a36Sopenharmony_cistruct hv_msi_desc3 {
25762306a36Sopenharmony_ci	u32	vector;
25862306a36Sopenharmony_ci	u8	delivery_mode;
25962306a36Sopenharmony_ci	u8	reserved;
26062306a36Sopenharmony_ci	u16	vector_count;
26162306a36Sopenharmony_ci	u16	processor_count;
26262306a36Sopenharmony_ci	u16	processor_array[32];
26362306a36Sopenharmony_ci} __packed;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci/**
26662306a36Sopenharmony_ci * struct tran_int_desc
26762306a36Sopenharmony_ci * @reserved:		unused, padding
26862306a36Sopenharmony_ci * @vector_count:	same as in hv_msi_desc
26962306a36Sopenharmony_ci * @data:		This is the "data payload" value that is
27062306a36Sopenharmony_ci *			written by the device when it generates
27162306a36Sopenharmony_ci *			a message-signaled interrupt, either MSI
27262306a36Sopenharmony_ci *			or MSI-X.
27362306a36Sopenharmony_ci * @address:		This is the address to which the data
27462306a36Sopenharmony_ci *			payload is written on interrupt
27562306a36Sopenharmony_ci *			generation.
27662306a36Sopenharmony_ci */
27762306a36Sopenharmony_cistruct tran_int_desc {
27862306a36Sopenharmony_ci	u16	reserved;
27962306a36Sopenharmony_ci	u16	vector_count;
28062306a36Sopenharmony_ci	u32	data;
28162306a36Sopenharmony_ci	u64	address;
28262306a36Sopenharmony_ci} __packed;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci/*
28562306a36Sopenharmony_ci * A generic message format for virtual PCI.
28662306a36Sopenharmony_ci * Specific message formats are defined later in the file.
28762306a36Sopenharmony_ci */
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_cistruct pci_message {
29062306a36Sopenharmony_ci	u32 type;
29162306a36Sopenharmony_ci} __packed;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_cistruct pci_child_message {
29462306a36Sopenharmony_ci	struct pci_message message_type;
29562306a36Sopenharmony_ci	union win_slot_encoding wslot;
29662306a36Sopenharmony_ci} __packed;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_cistruct pci_incoming_message {
29962306a36Sopenharmony_ci	struct vmpacket_descriptor hdr;
30062306a36Sopenharmony_ci	struct pci_message message_type;
30162306a36Sopenharmony_ci} __packed;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_cistruct pci_response {
30462306a36Sopenharmony_ci	struct vmpacket_descriptor hdr;
30562306a36Sopenharmony_ci	s32 status;			/* negative values are failures */
30662306a36Sopenharmony_ci} __packed;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_cistruct pci_packet {
30962306a36Sopenharmony_ci	void (*completion_func)(void *context, struct pci_response *resp,
31062306a36Sopenharmony_ci				int resp_packet_size);
31162306a36Sopenharmony_ci	void *compl_ctxt;
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	struct pci_message message[];
31462306a36Sopenharmony_ci};
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci/*
31762306a36Sopenharmony_ci * Specific message types supporting the PCI protocol.
31862306a36Sopenharmony_ci */
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci/*
32162306a36Sopenharmony_ci * Version negotiation message. Sent from the guest to the host.
32262306a36Sopenharmony_ci * The guest is free to try different versions until the host
32362306a36Sopenharmony_ci * accepts the version.
32462306a36Sopenharmony_ci *
32562306a36Sopenharmony_ci * pci_version: The protocol version requested.
32662306a36Sopenharmony_ci * is_last_attempt: If TRUE, this is the last version guest will request.
32762306a36Sopenharmony_ci * reservedz: Reserved field, set to zero.
32862306a36Sopenharmony_ci */
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_cistruct pci_version_request {
33162306a36Sopenharmony_ci	struct pci_message message_type;
33262306a36Sopenharmony_ci	u32 protocol_version;
33362306a36Sopenharmony_ci} __packed;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci/*
33662306a36Sopenharmony_ci * Bus D0 Entry.  This is sent from the guest to the host when the virtual
33762306a36Sopenharmony_ci * bus (PCI Express port) is ready for action.
33862306a36Sopenharmony_ci */
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_cistruct pci_bus_d0_entry {
34162306a36Sopenharmony_ci	struct pci_message message_type;
34262306a36Sopenharmony_ci	u32 reserved;
34362306a36Sopenharmony_ci	u64 mmio_base;
34462306a36Sopenharmony_ci} __packed;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_cistruct pci_bus_relations {
34762306a36Sopenharmony_ci	struct pci_incoming_message incoming;
34862306a36Sopenharmony_ci	u32 device_count;
34962306a36Sopenharmony_ci	struct pci_function_description func[];
35062306a36Sopenharmony_ci} __packed;
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_cistruct pci_bus_relations2 {
35362306a36Sopenharmony_ci	struct pci_incoming_message incoming;
35462306a36Sopenharmony_ci	u32 device_count;
35562306a36Sopenharmony_ci	struct pci_function_description2 func[];
35662306a36Sopenharmony_ci} __packed;
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_cistruct pci_q_res_req_response {
35962306a36Sopenharmony_ci	struct vmpacket_descriptor hdr;
36062306a36Sopenharmony_ci	s32 status;			/* negative values are failures */
36162306a36Sopenharmony_ci	u32 probed_bar[PCI_STD_NUM_BARS];
36262306a36Sopenharmony_ci} __packed;
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_cistruct pci_set_power {
36562306a36Sopenharmony_ci	struct pci_message message_type;
36662306a36Sopenharmony_ci	union win_slot_encoding wslot;
36762306a36Sopenharmony_ci	u32 power_state;		/* In Windows terms */
36862306a36Sopenharmony_ci	u32 reserved;
36962306a36Sopenharmony_ci} __packed;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_cistruct pci_set_power_response {
37262306a36Sopenharmony_ci	struct vmpacket_descriptor hdr;
37362306a36Sopenharmony_ci	s32 status;			/* negative values are failures */
37462306a36Sopenharmony_ci	union win_slot_encoding wslot;
37562306a36Sopenharmony_ci	u32 resultant_state;		/* In Windows terms */
37662306a36Sopenharmony_ci	u32 reserved;
37762306a36Sopenharmony_ci} __packed;
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_cistruct pci_resources_assigned {
38062306a36Sopenharmony_ci	struct pci_message message_type;
38162306a36Sopenharmony_ci	union win_slot_encoding wslot;
38262306a36Sopenharmony_ci	u8 memory_range[0x14][6];	/* not used here */
38362306a36Sopenharmony_ci	u32 msi_descriptors;
38462306a36Sopenharmony_ci	u32 reserved[4];
38562306a36Sopenharmony_ci} __packed;
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_cistruct pci_resources_assigned2 {
38862306a36Sopenharmony_ci	struct pci_message message_type;
38962306a36Sopenharmony_ci	union win_slot_encoding wslot;
39062306a36Sopenharmony_ci	u8 memory_range[0x14][6];	/* not used here */
39162306a36Sopenharmony_ci	u32 msi_descriptor_count;
39262306a36Sopenharmony_ci	u8 reserved[70];
39362306a36Sopenharmony_ci} __packed;
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_cistruct pci_create_interrupt {
39662306a36Sopenharmony_ci	struct pci_message message_type;
39762306a36Sopenharmony_ci	union win_slot_encoding wslot;
39862306a36Sopenharmony_ci	struct hv_msi_desc int_desc;
39962306a36Sopenharmony_ci} __packed;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_cistruct pci_create_int_response {
40262306a36Sopenharmony_ci	struct pci_response response;
40362306a36Sopenharmony_ci	u32 reserved;
40462306a36Sopenharmony_ci	struct tran_int_desc int_desc;
40562306a36Sopenharmony_ci} __packed;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_cistruct pci_create_interrupt2 {
40862306a36Sopenharmony_ci	struct pci_message message_type;
40962306a36Sopenharmony_ci	union win_slot_encoding wslot;
41062306a36Sopenharmony_ci	struct hv_msi_desc2 int_desc;
41162306a36Sopenharmony_ci} __packed;
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_cistruct pci_create_interrupt3 {
41462306a36Sopenharmony_ci	struct pci_message message_type;
41562306a36Sopenharmony_ci	union win_slot_encoding wslot;
41662306a36Sopenharmony_ci	struct hv_msi_desc3 int_desc;
41762306a36Sopenharmony_ci} __packed;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistruct pci_delete_interrupt {
42062306a36Sopenharmony_ci	struct pci_message message_type;
42162306a36Sopenharmony_ci	union win_slot_encoding wslot;
42262306a36Sopenharmony_ci	struct tran_int_desc int_desc;
42362306a36Sopenharmony_ci} __packed;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci/*
42662306a36Sopenharmony_ci * Note: the VM must pass a valid block id, wslot and bytes_requested.
42762306a36Sopenharmony_ci */
42862306a36Sopenharmony_cistruct pci_read_block {
42962306a36Sopenharmony_ci	struct pci_message message_type;
43062306a36Sopenharmony_ci	u32 block_id;
43162306a36Sopenharmony_ci	union win_slot_encoding wslot;
43262306a36Sopenharmony_ci	u32 bytes_requested;
43362306a36Sopenharmony_ci} __packed;
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_cistruct pci_read_block_response {
43662306a36Sopenharmony_ci	struct vmpacket_descriptor hdr;
43762306a36Sopenharmony_ci	u32 status;
43862306a36Sopenharmony_ci	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
43962306a36Sopenharmony_ci} __packed;
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci/*
44262306a36Sopenharmony_ci * Note: the VM must pass a valid block id, wslot and byte_count.
44362306a36Sopenharmony_ci */
44462306a36Sopenharmony_cistruct pci_write_block {
44562306a36Sopenharmony_ci	struct pci_message message_type;
44662306a36Sopenharmony_ci	u32 block_id;
44762306a36Sopenharmony_ci	union win_slot_encoding wslot;
44862306a36Sopenharmony_ci	u32 byte_count;
44962306a36Sopenharmony_ci	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
45062306a36Sopenharmony_ci} __packed;
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_cistruct pci_dev_inval_block {
45362306a36Sopenharmony_ci	struct pci_incoming_message incoming;
45462306a36Sopenharmony_ci	union win_slot_encoding wslot;
45562306a36Sopenharmony_ci	u64 block_mask;
45662306a36Sopenharmony_ci} __packed;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_cistruct pci_dev_incoming {
45962306a36Sopenharmony_ci	struct pci_incoming_message incoming;
46062306a36Sopenharmony_ci	union win_slot_encoding wslot;
46162306a36Sopenharmony_ci} __packed;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_cistruct pci_eject_response {
46462306a36Sopenharmony_ci	struct pci_message message_type;
46562306a36Sopenharmony_ci	union win_slot_encoding wslot;
46662306a36Sopenharmony_ci	u32 status;
46762306a36Sopenharmony_ci} __packed;
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_cistatic int pci_ring_size = VMBUS_RING_SIZE(SZ_16K);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci/*
47262306a36Sopenharmony_ci * Driver specific state.
47362306a36Sopenharmony_ci */
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_cienum hv_pcibus_state {
47662306a36Sopenharmony_ci	hv_pcibus_init = 0,
47762306a36Sopenharmony_ci	hv_pcibus_probed,
47862306a36Sopenharmony_ci	hv_pcibus_installed,
47962306a36Sopenharmony_ci	hv_pcibus_removing,
48062306a36Sopenharmony_ci	hv_pcibus_maximum
48162306a36Sopenharmony_ci};
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cistruct hv_pcibus_device {
48462306a36Sopenharmony_ci#ifdef CONFIG_X86
48562306a36Sopenharmony_ci	struct pci_sysdata sysdata;
48662306a36Sopenharmony_ci#elif defined(CONFIG_ARM64)
48762306a36Sopenharmony_ci	struct pci_config_window sysdata;
48862306a36Sopenharmony_ci#endif
48962306a36Sopenharmony_ci	struct pci_host_bridge *bridge;
49062306a36Sopenharmony_ci	struct fwnode_handle *fwnode;
49162306a36Sopenharmony_ci	/* Protocol version negotiated with the host */
49262306a36Sopenharmony_ci	enum pci_protocol_version_t protocol_version;
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	struct mutex state_lock;
49562306a36Sopenharmony_ci	enum hv_pcibus_state state;
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	struct hv_device *hdev;
49862306a36Sopenharmony_ci	resource_size_t low_mmio_space;
49962306a36Sopenharmony_ci	resource_size_t high_mmio_space;
50062306a36Sopenharmony_ci	struct resource *mem_config;
50162306a36Sopenharmony_ci	struct resource *low_mmio_res;
50262306a36Sopenharmony_ci	struct resource *high_mmio_res;
50362306a36Sopenharmony_ci	struct completion *survey_event;
50462306a36Sopenharmony_ci	struct pci_bus *pci_bus;
50562306a36Sopenharmony_ci	spinlock_t config_lock;	/* Avoid two threads writing index page */
50662306a36Sopenharmony_ci	spinlock_t device_list_lock;	/* Protect lists below */
50762306a36Sopenharmony_ci	void __iomem *cfg_addr;
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	struct list_head children;
51062306a36Sopenharmony_ci	struct list_head dr_list;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	struct msi_domain_info msi_info;
51362306a36Sopenharmony_ci	struct irq_domain *irq_domain;
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	struct workqueue_struct *wq;
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	/* Highest slot of child device with resources allocated */
51862306a36Sopenharmony_ci	int wslot_res_allocated;
51962306a36Sopenharmony_ci	bool use_calls; /* Use hypercalls to access mmio cfg space */
52062306a36Sopenharmony_ci};
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci/*
52362306a36Sopenharmony_ci * Tracks "Device Relations" messages from the host, which must be both
52462306a36Sopenharmony_ci * processed in order and deferred so that they don't run in the context
52562306a36Sopenharmony_ci * of the incoming packet callback.
52662306a36Sopenharmony_ci */
52762306a36Sopenharmony_cistruct hv_dr_work {
52862306a36Sopenharmony_ci	struct work_struct wrk;
52962306a36Sopenharmony_ci	struct hv_pcibus_device *bus;
53062306a36Sopenharmony_ci};
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_cistruct hv_pcidev_description {
53362306a36Sopenharmony_ci	u16	v_id;	/* vendor ID */
53462306a36Sopenharmony_ci	u16	d_id;	/* device ID */
53562306a36Sopenharmony_ci	u8	rev;
53662306a36Sopenharmony_ci	u8	prog_intf;
53762306a36Sopenharmony_ci	u8	subclass;
53862306a36Sopenharmony_ci	u8	base_class;
53962306a36Sopenharmony_ci	u32	subsystem_id;
54062306a36Sopenharmony_ci	union	win_slot_encoding win_slot;
54162306a36Sopenharmony_ci	u32	ser;	/* serial number */
54262306a36Sopenharmony_ci	u32	flags;
54362306a36Sopenharmony_ci	u16	virtual_numa_node;
54462306a36Sopenharmony_ci};
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_cistruct hv_dr_state {
54762306a36Sopenharmony_ci	struct list_head list_entry;
54862306a36Sopenharmony_ci	u32 device_count;
54962306a36Sopenharmony_ci	struct hv_pcidev_description func[];
55062306a36Sopenharmony_ci};
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_cistruct hv_pci_dev {
55362306a36Sopenharmony_ci	/* List protected by pci_rescan_remove_lock */
55462306a36Sopenharmony_ci	struct list_head list_entry;
55562306a36Sopenharmony_ci	refcount_t refs;
55662306a36Sopenharmony_ci	struct pci_slot *pci_slot;
55762306a36Sopenharmony_ci	struct hv_pcidev_description desc;
55862306a36Sopenharmony_ci	bool reported_missing;
55962306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
56062306a36Sopenharmony_ci	struct work_struct wrk;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci	void (*block_invalidate)(void *context, u64 block_mask);
56362306a36Sopenharmony_ci	void *invalidate_context;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	/*
56662306a36Sopenharmony_ci	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
56762306a36Sopenharmony_ci	 * read it back, for each of the BAR offsets within config space.
56862306a36Sopenharmony_ci	 */
56962306a36Sopenharmony_ci	u32 probed_bar[PCI_STD_NUM_BARS];
57062306a36Sopenharmony_ci};
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_cistruct hv_pci_compl {
57362306a36Sopenharmony_ci	struct completion host_event;
57462306a36Sopenharmony_ci	s32 completion_status;
57562306a36Sopenharmony_ci};
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_cistatic void hv_pci_onchannelcallback(void *context);
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci#ifdef CONFIG_X86
58062306a36Sopenharmony_ci#define DELIVERY_MODE	APIC_DELIVERY_MODE_FIXED
58162306a36Sopenharmony_ci#define FLOW_HANDLER	handle_edge_irq
58262306a36Sopenharmony_ci#define FLOW_NAME	"edge"
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_cistatic int hv_pci_irqchip_init(void)
58562306a36Sopenharmony_ci{
58662306a36Sopenharmony_ci	return 0;
58762306a36Sopenharmony_ci}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_cistatic struct irq_domain *hv_pci_get_root_domain(void)
59062306a36Sopenharmony_ci{
59162306a36Sopenharmony_ci	return x86_vector_domain;
59262306a36Sopenharmony_ci}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_cistatic unsigned int hv_msi_get_int_vector(struct irq_data *data)
59562306a36Sopenharmony_ci{
59662306a36Sopenharmony_ci	struct irq_cfg *cfg = irqd_cfg(data);
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	return cfg->vector;
59962306a36Sopenharmony_ci}
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci#define hv_msi_prepare		pci_msi_prepare
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci/**
60462306a36Sopenharmony_ci * hv_arch_irq_unmask() - "Unmask" the IRQ by setting its current
60562306a36Sopenharmony_ci * affinity.
60662306a36Sopenharmony_ci * @data:	Describes the IRQ
60762306a36Sopenharmony_ci *
60862306a36Sopenharmony_ci * Build new a destination for the MSI and make a hypercall to
60962306a36Sopenharmony_ci * update the Interrupt Redirection Table. "Device Logical ID"
61062306a36Sopenharmony_ci * is built out of this PCI bus's instance GUID and the function
61162306a36Sopenharmony_ci * number of the device.
61262306a36Sopenharmony_ci */
61362306a36Sopenharmony_cistatic void hv_arch_irq_unmask(struct irq_data *data)
61462306a36Sopenharmony_ci{
61562306a36Sopenharmony_ci	struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
61662306a36Sopenharmony_ci	struct hv_retarget_device_interrupt *params;
61762306a36Sopenharmony_ci	struct tran_int_desc *int_desc;
61862306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
61962306a36Sopenharmony_ci	const struct cpumask *dest;
62062306a36Sopenharmony_ci	cpumask_var_t tmp;
62162306a36Sopenharmony_ci	struct pci_bus *pbus;
62262306a36Sopenharmony_ci	struct pci_dev *pdev;
62362306a36Sopenharmony_ci	unsigned long flags;
62462306a36Sopenharmony_ci	u32 var_size = 0;
62562306a36Sopenharmony_ci	int cpu, nr_bank;
62662306a36Sopenharmony_ci	u64 res;
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	dest = irq_data_get_effective_affinity_mask(data);
62962306a36Sopenharmony_ci	pdev = msi_desc_to_pci_dev(msi_desc);
63062306a36Sopenharmony_ci	pbus = pdev->bus;
63162306a36Sopenharmony_ci	hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
63262306a36Sopenharmony_ci	int_desc = data->chip_data;
63362306a36Sopenharmony_ci	if (!int_desc) {
63462306a36Sopenharmony_ci		dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
63562306a36Sopenharmony_ci			 __func__, data->irq);
63662306a36Sopenharmony_ci		return;
63762306a36Sopenharmony_ci	}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	local_irq_save(flags);
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	params = *this_cpu_ptr(hyperv_pcpu_input_arg);
64262306a36Sopenharmony_ci	memset(params, 0, sizeof(*params));
64362306a36Sopenharmony_ci	params->partition_id = HV_PARTITION_ID_SELF;
64462306a36Sopenharmony_ci	params->int_entry.source = HV_INTERRUPT_SOURCE_MSI;
64562306a36Sopenharmony_ci	params->int_entry.msi_entry.address.as_uint32 = int_desc->address & 0xffffffff;
64662306a36Sopenharmony_ci	params->int_entry.msi_entry.data.as_uint32 = int_desc->data;
64762306a36Sopenharmony_ci	params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
64862306a36Sopenharmony_ci			   (hbus->hdev->dev_instance.b[4] << 16) |
64962306a36Sopenharmony_ci			   (hbus->hdev->dev_instance.b[7] << 8) |
65062306a36Sopenharmony_ci			   (hbus->hdev->dev_instance.b[6] & 0xf8) |
65162306a36Sopenharmony_ci			   PCI_FUNC(pdev->devfn);
65262306a36Sopenharmony_ci	params->int_target.vector = hv_msi_get_int_vector(data);
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	/*
65562306a36Sopenharmony_ci	 * Honoring apic->delivery_mode set to APIC_DELIVERY_MODE_FIXED by
65662306a36Sopenharmony_ci	 * setting the HV_DEVICE_INTERRUPT_TARGET_MULTICAST flag results in a
65762306a36Sopenharmony_ci	 * spurious interrupt storm. Not doing so does not seem to have a
65862306a36Sopenharmony_ci	 * negative effect (yet?).
65962306a36Sopenharmony_ci	 */
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	if (hbus->protocol_version >= PCI_PROTOCOL_VERSION_1_2) {
66262306a36Sopenharmony_ci		/*
66362306a36Sopenharmony_ci		 * PCI_PROTOCOL_VERSION_1_2 supports the VP_SET version of the
66462306a36Sopenharmony_ci		 * HVCALL_RETARGET_INTERRUPT hypercall, which also coincides
66562306a36Sopenharmony_ci		 * with >64 VP support.
66662306a36Sopenharmony_ci		 * ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED
66762306a36Sopenharmony_ci		 * is not sufficient for this hypercall.
66862306a36Sopenharmony_ci		 */
66962306a36Sopenharmony_ci		params->int_target.flags |=
67062306a36Sopenharmony_ci			HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci		if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) {
67362306a36Sopenharmony_ci			res = 1;
67462306a36Sopenharmony_ci			goto out;
67562306a36Sopenharmony_ci		}
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci		cpumask_and(tmp, dest, cpu_online_mask);
67862306a36Sopenharmony_ci		nr_bank = cpumask_to_vpset(&params->int_target.vp_set, tmp);
67962306a36Sopenharmony_ci		free_cpumask_var(tmp);
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci		if (nr_bank <= 0) {
68262306a36Sopenharmony_ci			res = 1;
68362306a36Sopenharmony_ci			goto out;
68462306a36Sopenharmony_ci		}
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci		/*
68762306a36Sopenharmony_ci		 * var-sized hypercall, var-size starts after vp_mask (thus
68862306a36Sopenharmony_ci		 * vp_set.format does not count, but vp_set.valid_bank_mask
68962306a36Sopenharmony_ci		 * does).
69062306a36Sopenharmony_ci		 */
69162306a36Sopenharmony_ci		var_size = 1 + nr_bank;
69262306a36Sopenharmony_ci	} else {
69362306a36Sopenharmony_ci		for_each_cpu_and(cpu, dest, cpu_online_mask) {
69462306a36Sopenharmony_ci			params->int_target.vp_mask |=
69562306a36Sopenharmony_ci				(1ULL << hv_cpu_number_to_vp_number(cpu));
69662306a36Sopenharmony_ci		}
69762306a36Sopenharmony_ci	}
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17),
70062306a36Sopenharmony_ci			      params, NULL);
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ciout:
70362306a36Sopenharmony_ci	local_irq_restore(flags);
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	/*
70662306a36Sopenharmony_ci	 * During hibernation, when a CPU is offlined, the kernel tries
70762306a36Sopenharmony_ci	 * to move the interrupt to the remaining CPUs that haven't
70862306a36Sopenharmony_ci	 * been offlined yet. In this case, the below hv_do_hypercall()
70962306a36Sopenharmony_ci	 * always fails since the vmbus channel has been closed:
71062306a36Sopenharmony_ci	 * refer to cpu_disable_common() -> fixup_irqs() ->
71162306a36Sopenharmony_ci	 * irq_migrate_all_off_this_cpu() -> migrate_one_irq().
71262306a36Sopenharmony_ci	 *
71362306a36Sopenharmony_ci	 * Suppress the error message for hibernation because the failure
71462306a36Sopenharmony_ci	 * during hibernation does not matter (at this time all the devices
71562306a36Sopenharmony_ci	 * have been frozen). Note: the correct affinity info is still updated
71662306a36Sopenharmony_ci	 * into the irqdata data structure in migrate_one_irq() ->
71762306a36Sopenharmony_ci	 * irq_do_set_affinity(), so later when the VM resumes,
71862306a36Sopenharmony_ci	 * hv_pci_restore_msi_state() is able to correctly restore the
71962306a36Sopenharmony_ci	 * interrupt with the correct affinity.
72062306a36Sopenharmony_ci	 */
72162306a36Sopenharmony_ci	if (!hv_result_success(res) && hbus->state != hv_pcibus_removing)
72262306a36Sopenharmony_ci		dev_err(&hbus->hdev->device,
72362306a36Sopenharmony_ci			"%s() failed: %#llx", __func__, res);
72462306a36Sopenharmony_ci}
72562306a36Sopenharmony_ci#elif defined(CONFIG_ARM64)
72662306a36Sopenharmony_ci/*
72762306a36Sopenharmony_ci * SPI vectors to use for vPCI; arch SPIs range is [32, 1019], but leaving a bit
72862306a36Sopenharmony_ci * of room at the start to allow for SPIs to be specified through ACPI and
72962306a36Sopenharmony_ci * starting with a power of two to satisfy power of 2 multi-MSI requirement.
73062306a36Sopenharmony_ci */
73162306a36Sopenharmony_ci#define HV_PCI_MSI_SPI_START	64
73262306a36Sopenharmony_ci#define HV_PCI_MSI_SPI_NR	(1020 - HV_PCI_MSI_SPI_START)
73362306a36Sopenharmony_ci#define DELIVERY_MODE		0
73462306a36Sopenharmony_ci#define FLOW_HANDLER		NULL
73562306a36Sopenharmony_ci#define FLOW_NAME		NULL
73662306a36Sopenharmony_ci#define hv_msi_prepare		NULL
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_cistruct hv_pci_chip_data {
73962306a36Sopenharmony_ci	DECLARE_BITMAP(spi_map, HV_PCI_MSI_SPI_NR);
74062306a36Sopenharmony_ci	struct mutex	map_lock;
74162306a36Sopenharmony_ci};
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci/* Hyper-V vPCI MSI GIC IRQ domain */
74462306a36Sopenharmony_cistatic struct irq_domain *hv_msi_gic_irq_domain;
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci/* Hyper-V PCI MSI IRQ chip */
74762306a36Sopenharmony_cistatic struct irq_chip hv_arm64_msi_irq_chip = {
74862306a36Sopenharmony_ci	.name = "MSI",
74962306a36Sopenharmony_ci	.irq_set_affinity = irq_chip_set_affinity_parent,
75062306a36Sopenharmony_ci	.irq_eoi = irq_chip_eoi_parent,
75162306a36Sopenharmony_ci	.irq_mask = irq_chip_mask_parent,
75262306a36Sopenharmony_ci	.irq_unmask = irq_chip_unmask_parent
75362306a36Sopenharmony_ci};
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_cistatic unsigned int hv_msi_get_int_vector(struct irq_data *irqd)
75662306a36Sopenharmony_ci{
75762306a36Sopenharmony_ci	return irqd->parent_data->hwirq;
75862306a36Sopenharmony_ci}
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci/*
76162306a36Sopenharmony_ci * @nr_bm_irqs:		Indicates the number of IRQs that were allocated from
76262306a36Sopenharmony_ci *			the bitmap.
76362306a36Sopenharmony_ci * @nr_dom_irqs:	Indicates the number of IRQs that were allocated from
76462306a36Sopenharmony_ci *			the parent domain.
76562306a36Sopenharmony_ci */
76662306a36Sopenharmony_cistatic void hv_pci_vec_irq_free(struct irq_domain *domain,
76762306a36Sopenharmony_ci				unsigned int virq,
76862306a36Sopenharmony_ci				unsigned int nr_bm_irqs,
76962306a36Sopenharmony_ci				unsigned int nr_dom_irqs)
77062306a36Sopenharmony_ci{
77162306a36Sopenharmony_ci	struct hv_pci_chip_data *chip_data = domain->host_data;
77262306a36Sopenharmony_ci	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
77362306a36Sopenharmony_ci	int first = d->hwirq - HV_PCI_MSI_SPI_START;
77462306a36Sopenharmony_ci	int i;
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	mutex_lock(&chip_data->map_lock);
77762306a36Sopenharmony_ci	bitmap_release_region(chip_data->spi_map,
77862306a36Sopenharmony_ci			      first,
77962306a36Sopenharmony_ci			      get_count_order(nr_bm_irqs));
78062306a36Sopenharmony_ci	mutex_unlock(&chip_data->map_lock);
78162306a36Sopenharmony_ci	for (i = 0; i < nr_dom_irqs; i++) {
78262306a36Sopenharmony_ci		if (i)
78362306a36Sopenharmony_ci			d = irq_domain_get_irq_data(domain, virq + i);
78462306a36Sopenharmony_ci		irq_domain_reset_irq_data(d);
78562306a36Sopenharmony_ci	}
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	irq_domain_free_irqs_parent(domain, virq, nr_dom_irqs);
78862306a36Sopenharmony_ci}
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_cistatic void hv_pci_vec_irq_domain_free(struct irq_domain *domain,
79162306a36Sopenharmony_ci				       unsigned int virq,
79262306a36Sopenharmony_ci				       unsigned int nr_irqs)
79362306a36Sopenharmony_ci{
79462306a36Sopenharmony_ci	hv_pci_vec_irq_free(domain, virq, nr_irqs, nr_irqs);
79562306a36Sopenharmony_ci}
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_cistatic int hv_pci_vec_alloc_device_irq(struct irq_domain *domain,
79862306a36Sopenharmony_ci				       unsigned int nr_irqs,
79962306a36Sopenharmony_ci				       irq_hw_number_t *hwirq)
80062306a36Sopenharmony_ci{
80162306a36Sopenharmony_ci	struct hv_pci_chip_data *chip_data = domain->host_data;
80262306a36Sopenharmony_ci	int index;
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	/* Find and allocate region from the SPI bitmap */
80562306a36Sopenharmony_ci	mutex_lock(&chip_data->map_lock);
80662306a36Sopenharmony_ci	index = bitmap_find_free_region(chip_data->spi_map,
80762306a36Sopenharmony_ci					HV_PCI_MSI_SPI_NR,
80862306a36Sopenharmony_ci					get_count_order(nr_irqs));
80962306a36Sopenharmony_ci	mutex_unlock(&chip_data->map_lock);
81062306a36Sopenharmony_ci	if (index < 0)
81162306a36Sopenharmony_ci		return -ENOSPC;
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	*hwirq = index + HV_PCI_MSI_SPI_START;
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	return 0;
81662306a36Sopenharmony_ci}
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_cistatic int hv_pci_vec_irq_gic_domain_alloc(struct irq_domain *domain,
81962306a36Sopenharmony_ci					   unsigned int virq,
82062306a36Sopenharmony_ci					   irq_hw_number_t hwirq)
82162306a36Sopenharmony_ci{
82262306a36Sopenharmony_ci	struct irq_fwspec fwspec;
82362306a36Sopenharmony_ci	struct irq_data *d;
82462306a36Sopenharmony_ci	int ret;
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	fwspec.fwnode = domain->parent->fwnode;
82762306a36Sopenharmony_ci	fwspec.param_count = 2;
82862306a36Sopenharmony_ci	fwspec.param[0] = hwirq;
82962306a36Sopenharmony_ci	fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
83262306a36Sopenharmony_ci	if (ret)
83362306a36Sopenharmony_ci		return ret;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	/*
83662306a36Sopenharmony_ci	 * Since the interrupt specifier is not coming from ACPI or DT, the
83762306a36Sopenharmony_ci	 * trigger type will need to be set explicitly. Otherwise, it will be
83862306a36Sopenharmony_ci	 * set to whatever is in the GIC configuration.
83962306a36Sopenharmony_ci	 */
84062306a36Sopenharmony_ci	d = irq_domain_get_irq_data(domain->parent, virq);
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci	return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
84362306a36Sopenharmony_ci}
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_cistatic int hv_pci_vec_irq_domain_alloc(struct irq_domain *domain,
84662306a36Sopenharmony_ci				       unsigned int virq, unsigned int nr_irqs,
84762306a36Sopenharmony_ci				       void *args)
84862306a36Sopenharmony_ci{
84962306a36Sopenharmony_ci	irq_hw_number_t hwirq;
85062306a36Sopenharmony_ci	unsigned int i;
85162306a36Sopenharmony_ci	int ret;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	ret = hv_pci_vec_alloc_device_irq(domain, nr_irqs, &hwirq);
85462306a36Sopenharmony_ci	if (ret)
85562306a36Sopenharmony_ci		return ret;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	for (i = 0; i < nr_irqs; i++) {
85862306a36Sopenharmony_ci		ret = hv_pci_vec_irq_gic_domain_alloc(domain, virq + i,
85962306a36Sopenharmony_ci						      hwirq + i);
86062306a36Sopenharmony_ci		if (ret) {
86162306a36Sopenharmony_ci			hv_pci_vec_irq_free(domain, virq, nr_irqs, i);
86262306a36Sopenharmony_ci			return ret;
86362306a36Sopenharmony_ci		}
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci		irq_domain_set_hwirq_and_chip(domain, virq + i,
86662306a36Sopenharmony_ci					      hwirq + i,
86762306a36Sopenharmony_ci					      &hv_arm64_msi_irq_chip,
86862306a36Sopenharmony_ci					      domain->host_data);
86962306a36Sopenharmony_ci		pr_debug("pID:%d vID:%u\n", (int)(hwirq + i), virq + i);
87062306a36Sopenharmony_ci	}
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	return 0;
87362306a36Sopenharmony_ci}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci/*
87662306a36Sopenharmony_ci * Pick the first cpu as the irq affinity that can be temporarily used for
87762306a36Sopenharmony_ci * composing MSI from the hypervisor. GIC will eventually set the right
87862306a36Sopenharmony_ci * affinity for the irq and the 'unmask' will retarget the interrupt to that
87962306a36Sopenharmony_ci * cpu.
88062306a36Sopenharmony_ci */
88162306a36Sopenharmony_cistatic int hv_pci_vec_irq_domain_activate(struct irq_domain *domain,
88262306a36Sopenharmony_ci					  struct irq_data *irqd, bool reserve)
88362306a36Sopenharmony_ci{
88462306a36Sopenharmony_ci	int cpu = cpumask_first(cpu_present_mask);
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	return 0;
88962306a36Sopenharmony_ci}
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_cistatic const struct irq_domain_ops hv_pci_domain_ops = {
89262306a36Sopenharmony_ci	.alloc	= hv_pci_vec_irq_domain_alloc,
89362306a36Sopenharmony_ci	.free	= hv_pci_vec_irq_domain_free,
89462306a36Sopenharmony_ci	.activate = hv_pci_vec_irq_domain_activate,
89562306a36Sopenharmony_ci};
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_cistatic int hv_pci_irqchip_init(void)
89862306a36Sopenharmony_ci{
89962306a36Sopenharmony_ci	static struct hv_pci_chip_data *chip_data;
90062306a36Sopenharmony_ci	struct fwnode_handle *fn = NULL;
90162306a36Sopenharmony_ci	int ret = -ENOMEM;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL);
90462306a36Sopenharmony_ci	if (!chip_data)
90562306a36Sopenharmony_ci		return ret;
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci	mutex_init(&chip_data->map_lock);
90862306a36Sopenharmony_ci	fn = irq_domain_alloc_named_fwnode("hv_vpci_arm64");
90962306a36Sopenharmony_ci	if (!fn)
91062306a36Sopenharmony_ci		goto free_chip;
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	/*
91362306a36Sopenharmony_ci	 * IRQ domain once enabled, should not be removed since there is no
91462306a36Sopenharmony_ci	 * way to ensure that all the corresponding devices are also gone and
91562306a36Sopenharmony_ci	 * no interrupts will be generated.
91662306a36Sopenharmony_ci	 */
91762306a36Sopenharmony_ci	hv_msi_gic_irq_domain = acpi_irq_create_hierarchy(0, HV_PCI_MSI_SPI_NR,
91862306a36Sopenharmony_ci							  fn, &hv_pci_domain_ops,
91962306a36Sopenharmony_ci							  chip_data);
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	if (!hv_msi_gic_irq_domain) {
92262306a36Sopenharmony_ci		pr_err("Failed to create Hyper-V arm64 vPCI MSI IRQ domain\n");
92362306a36Sopenharmony_ci		goto free_chip;
92462306a36Sopenharmony_ci	}
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	return 0;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_cifree_chip:
92962306a36Sopenharmony_ci	kfree(chip_data);
93062306a36Sopenharmony_ci	if (fn)
93162306a36Sopenharmony_ci		irq_domain_free_fwnode(fn);
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci	return ret;
93462306a36Sopenharmony_ci}
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_cistatic struct irq_domain *hv_pci_get_root_domain(void)
93762306a36Sopenharmony_ci{
93862306a36Sopenharmony_ci	return hv_msi_gic_irq_domain;
93962306a36Sopenharmony_ci}
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci/*
94262306a36Sopenharmony_ci * SPIs are used for interrupts of PCI devices and SPIs is managed via GICD
94362306a36Sopenharmony_ci * registers which Hyper-V already supports, so no hypercall needed.
94462306a36Sopenharmony_ci */
94562306a36Sopenharmony_cistatic void hv_arch_irq_unmask(struct irq_data *data) { }
94662306a36Sopenharmony_ci#endif /* CONFIG_ARM64 */
94762306a36Sopenharmony_ci
94862306a36Sopenharmony_ci/**
94962306a36Sopenharmony_ci * hv_pci_generic_compl() - Invoked for a completion packet
95062306a36Sopenharmony_ci * @context:		Set up by the sender of the packet.
95162306a36Sopenharmony_ci * @resp:		The response packet
95262306a36Sopenharmony_ci * @resp_packet_size:	Size in bytes of the packet
95362306a36Sopenharmony_ci *
95462306a36Sopenharmony_ci * This function is used to trigger an event and report status
95562306a36Sopenharmony_ci * for any message for which the completion packet contains a
95662306a36Sopenharmony_ci * status and nothing else.
95762306a36Sopenharmony_ci */
95862306a36Sopenharmony_cistatic void hv_pci_generic_compl(void *context, struct pci_response *resp,
95962306a36Sopenharmony_ci				 int resp_packet_size)
96062306a36Sopenharmony_ci{
96162306a36Sopenharmony_ci	struct hv_pci_compl *comp_pkt = context;
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	comp_pkt->completion_status = resp->status;
96462306a36Sopenharmony_ci	complete(&comp_pkt->host_event);
96562306a36Sopenharmony_ci}
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_cistatic struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
96862306a36Sopenharmony_ci						u32 wslot);
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_cistatic void get_pcichild(struct hv_pci_dev *hpdev)
97162306a36Sopenharmony_ci{
97262306a36Sopenharmony_ci	refcount_inc(&hpdev->refs);
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_cistatic void put_pcichild(struct hv_pci_dev *hpdev)
97662306a36Sopenharmony_ci{
97762306a36Sopenharmony_ci	if (refcount_dec_and_test(&hpdev->refs))
97862306a36Sopenharmony_ci		kfree(hpdev);
97962306a36Sopenharmony_ci}
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci/*
98262306a36Sopenharmony_ci * There is no good way to get notified from vmbus_onoffer_rescind(),
98362306a36Sopenharmony_ci * so let's use polling here, since this is not a hot path.
98462306a36Sopenharmony_ci */
98562306a36Sopenharmony_cistatic int wait_for_response(struct hv_device *hdev,
98662306a36Sopenharmony_ci			     struct completion *comp)
98762306a36Sopenharmony_ci{
98862306a36Sopenharmony_ci	while (true) {
98962306a36Sopenharmony_ci		if (hdev->channel->rescind) {
99062306a36Sopenharmony_ci			dev_warn_once(&hdev->device, "The device is gone.\n");
99162306a36Sopenharmony_ci			return -ENODEV;
99262306a36Sopenharmony_ci		}
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci		if (wait_for_completion_timeout(comp, HZ / 10))
99562306a36Sopenharmony_ci			break;
99662306a36Sopenharmony_ci	}
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	return 0;
99962306a36Sopenharmony_ci}
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci/**
100262306a36Sopenharmony_ci * devfn_to_wslot() - Convert from Linux PCI slot to Windows
100362306a36Sopenharmony_ci * @devfn:	The Linux representation of PCI slot
100462306a36Sopenharmony_ci *
100562306a36Sopenharmony_ci * Windows uses a slightly different representation of PCI slot.
100662306a36Sopenharmony_ci *
100762306a36Sopenharmony_ci * Return: The Windows representation
100862306a36Sopenharmony_ci */
100962306a36Sopenharmony_cistatic u32 devfn_to_wslot(int devfn)
101062306a36Sopenharmony_ci{
101162306a36Sopenharmony_ci	union win_slot_encoding wslot;
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	wslot.slot = 0;
101462306a36Sopenharmony_ci	wslot.bits.dev = PCI_SLOT(devfn);
101562306a36Sopenharmony_ci	wslot.bits.func = PCI_FUNC(devfn);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	return wslot.slot;
101862306a36Sopenharmony_ci}
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci/**
102162306a36Sopenharmony_ci * wslot_to_devfn() - Convert from Windows PCI slot to Linux
102262306a36Sopenharmony_ci * @wslot:	The Windows representation of PCI slot
102362306a36Sopenharmony_ci *
102462306a36Sopenharmony_ci * Windows uses a slightly different representation of PCI slot.
102562306a36Sopenharmony_ci *
102662306a36Sopenharmony_ci * Return: The Linux representation
102762306a36Sopenharmony_ci */
102862306a36Sopenharmony_cistatic int wslot_to_devfn(u32 wslot)
102962306a36Sopenharmony_ci{
103062306a36Sopenharmony_ci	union win_slot_encoding slot_no;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	slot_no.slot = wslot;
103362306a36Sopenharmony_ci	return PCI_DEVFN(slot_no.bits.dev, slot_no.bits.func);
103462306a36Sopenharmony_ci}
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_cistatic void hv_pci_read_mmio(struct device *dev, phys_addr_t gpa, int size, u32 *val)
103762306a36Sopenharmony_ci{
103862306a36Sopenharmony_ci	struct hv_mmio_read_input *in;
103962306a36Sopenharmony_ci	struct hv_mmio_read_output *out;
104062306a36Sopenharmony_ci	u64 ret;
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_ci	/*
104362306a36Sopenharmony_ci	 * Must be called with interrupts disabled so it is safe
104462306a36Sopenharmony_ci	 * to use the per-cpu input argument page.  Use it for
104562306a36Sopenharmony_ci	 * both input and output.
104662306a36Sopenharmony_ci	 */
104762306a36Sopenharmony_ci	in = *this_cpu_ptr(hyperv_pcpu_input_arg);
104862306a36Sopenharmony_ci	out = *this_cpu_ptr(hyperv_pcpu_input_arg) + sizeof(*in);
104962306a36Sopenharmony_ci	in->gpa = gpa;
105062306a36Sopenharmony_ci	in->size = size;
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	ret = hv_do_hypercall(HVCALL_MMIO_READ, in, out);
105362306a36Sopenharmony_ci	if (hv_result_success(ret)) {
105462306a36Sopenharmony_ci		switch (size) {
105562306a36Sopenharmony_ci		case 1:
105662306a36Sopenharmony_ci			*val = *(u8 *)(out->data);
105762306a36Sopenharmony_ci			break;
105862306a36Sopenharmony_ci		case 2:
105962306a36Sopenharmony_ci			*val = *(u16 *)(out->data);
106062306a36Sopenharmony_ci			break;
106162306a36Sopenharmony_ci		default:
106262306a36Sopenharmony_ci			*val = *(u32 *)(out->data);
106362306a36Sopenharmony_ci			break;
106462306a36Sopenharmony_ci		}
106562306a36Sopenharmony_ci	} else
106662306a36Sopenharmony_ci		dev_err(dev, "MMIO read hypercall error %llx addr %llx size %d\n",
106762306a36Sopenharmony_ci				ret, gpa, size);
106862306a36Sopenharmony_ci}
106962306a36Sopenharmony_ci
107062306a36Sopenharmony_cistatic void hv_pci_write_mmio(struct device *dev, phys_addr_t gpa, int size, u32 val)
107162306a36Sopenharmony_ci{
107262306a36Sopenharmony_ci	struct hv_mmio_write_input *in;
107362306a36Sopenharmony_ci	u64 ret;
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci	/*
107662306a36Sopenharmony_ci	 * Must be called with interrupts disabled so it is safe
107762306a36Sopenharmony_ci	 * to use the per-cpu input argument memory.
107862306a36Sopenharmony_ci	 */
107962306a36Sopenharmony_ci	in = *this_cpu_ptr(hyperv_pcpu_input_arg);
108062306a36Sopenharmony_ci	in->gpa = gpa;
108162306a36Sopenharmony_ci	in->size = size;
108262306a36Sopenharmony_ci	switch (size) {
108362306a36Sopenharmony_ci	case 1:
108462306a36Sopenharmony_ci		*(u8 *)(in->data) = val;
108562306a36Sopenharmony_ci		break;
108662306a36Sopenharmony_ci	case 2:
108762306a36Sopenharmony_ci		*(u16 *)(in->data) = val;
108862306a36Sopenharmony_ci		break;
108962306a36Sopenharmony_ci	default:
109062306a36Sopenharmony_ci		*(u32 *)(in->data) = val;
109162306a36Sopenharmony_ci		break;
109262306a36Sopenharmony_ci	}
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	ret = hv_do_hypercall(HVCALL_MMIO_WRITE, in, NULL);
109562306a36Sopenharmony_ci	if (!hv_result_success(ret))
109662306a36Sopenharmony_ci		dev_err(dev, "MMIO write hypercall error %llx addr %llx size %d\n",
109762306a36Sopenharmony_ci				ret, gpa, size);
109862306a36Sopenharmony_ci}
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci/*
110162306a36Sopenharmony_ci * PCI Configuration Space for these root PCI buses is implemented as a pair
110262306a36Sopenharmony_ci * of pages in memory-mapped I/O space.  Writing to the first page chooses
110362306a36Sopenharmony_ci * the PCI function being written or read.  Once the first page has been
110462306a36Sopenharmony_ci * written to, the following page maps in the entire configuration space of
110562306a36Sopenharmony_ci * the function.
110662306a36Sopenharmony_ci */
110762306a36Sopenharmony_ci
110862306a36Sopenharmony_ci/**
110962306a36Sopenharmony_ci * _hv_pcifront_read_config() - Internal PCI config read
111062306a36Sopenharmony_ci * @hpdev:	The PCI driver's representation of the device
111162306a36Sopenharmony_ci * @where:	Offset within config space
111262306a36Sopenharmony_ci * @size:	Size of the transfer
111362306a36Sopenharmony_ci * @val:	Pointer to the buffer receiving the data
111462306a36Sopenharmony_ci */
111562306a36Sopenharmony_cistatic void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
111662306a36Sopenharmony_ci				     int size, u32 *val)
111762306a36Sopenharmony_ci{
111862306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hpdev->hbus;
111962306a36Sopenharmony_ci	struct device *dev = &hbus->hdev->device;
112062306a36Sopenharmony_ci	int offset = where + CFG_PAGE_OFFSET;
112162306a36Sopenharmony_ci	unsigned long flags;
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	/*
112462306a36Sopenharmony_ci	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
112562306a36Sopenharmony_ci	 */
112662306a36Sopenharmony_ci	if (where + size <= PCI_COMMAND) {
112762306a36Sopenharmony_ci		memcpy(val, ((u8 *)&hpdev->desc.v_id) + where, size);
112862306a36Sopenharmony_ci	} else if (where >= PCI_CLASS_REVISION && where + size <=
112962306a36Sopenharmony_ci		   PCI_CACHE_LINE_SIZE) {
113062306a36Sopenharmony_ci		memcpy(val, ((u8 *)&hpdev->desc.rev) + where -
113162306a36Sopenharmony_ci		       PCI_CLASS_REVISION, size);
113262306a36Sopenharmony_ci	} else if (where >= PCI_SUBSYSTEM_VENDOR_ID && where + size <=
113362306a36Sopenharmony_ci		   PCI_ROM_ADDRESS) {
113462306a36Sopenharmony_ci		memcpy(val, (u8 *)&hpdev->desc.subsystem_id + where -
113562306a36Sopenharmony_ci		       PCI_SUBSYSTEM_VENDOR_ID, size);
113662306a36Sopenharmony_ci	} else if (where >= PCI_ROM_ADDRESS && where + size <=
113762306a36Sopenharmony_ci		   PCI_CAPABILITY_LIST) {
113862306a36Sopenharmony_ci		/* ROM BARs are unimplemented */
113962306a36Sopenharmony_ci		*val = 0;
114062306a36Sopenharmony_ci	} else if (where >= PCI_INTERRUPT_LINE && where + size <=
114162306a36Sopenharmony_ci		   PCI_INTERRUPT_PIN) {
114262306a36Sopenharmony_ci		/*
114362306a36Sopenharmony_ci		 * Interrupt Line and Interrupt PIN are hard-wired to zero
114462306a36Sopenharmony_ci		 * because this front-end only supports message-signaled
114562306a36Sopenharmony_ci		 * interrupts.
114662306a36Sopenharmony_ci		 */
114762306a36Sopenharmony_ci		*val = 0;
114862306a36Sopenharmony_ci	} else if (where + size <= CFG_PAGE_SIZE) {
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_ci		spin_lock_irqsave(&hbus->config_lock, flags);
115162306a36Sopenharmony_ci		if (hbus->use_calls) {
115262306a36Sopenharmony_ci			phys_addr_t addr = hbus->mem_config->start + offset;
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci			hv_pci_write_mmio(dev, hbus->mem_config->start, 4,
115562306a36Sopenharmony_ci						hpdev->desc.win_slot.slot);
115662306a36Sopenharmony_ci			hv_pci_read_mmio(dev, addr, size, val);
115762306a36Sopenharmony_ci		} else {
115862306a36Sopenharmony_ci			void __iomem *addr = hbus->cfg_addr + offset;
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci			/* Choose the function to be read. (See comment above) */
116162306a36Sopenharmony_ci			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
116262306a36Sopenharmony_ci			/* Make sure the function was chosen before reading. */
116362306a36Sopenharmony_ci			mb();
116462306a36Sopenharmony_ci			/* Read from that function's config space. */
116562306a36Sopenharmony_ci			switch (size) {
116662306a36Sopenharmony_ci			case 1:
116762306a36Sopenharmony_ci				*val = readb(addr);
116862306a36Sopenharmony_ci				break;
116962306a36Sopenharmony_ci			case 2:
117062306a36Sopenharmony_ci				*val = readw(addr);
117162306a36Sopenharmony_ci				break;
117262306a36Sopenharmony_ci			default:
117362306a36Sopenharmony_ci				*val = readl(addr);
117462306a36Sopenharmony_ci				break;
117562306a36Sopenharmony_ci			}
117662306a36Sopenharmony_ci			/*
117762306a36Sopenharmony_ci			 * Make sure the read was done before we release the
117862306a36Sopenharmony_ci			 * spinlock allowing consecutive reads/writes.
117962306a36Sopenharmony_ci			 */
118062306a36Sopenharmony_ci			mb();
118162306a36Sopenharmony_ci		}
118262306a36Sopenharmony_ci		spin_unlock_irqrestore(&hbus->config_lock, flags);
118362306a36Sopenharmony_ci	} else {
118462306a36Sopenharmony_ci		dev_err(dev, "Attempt to read beyond a function's config space.\n");
118562306a36Sopenharmony_ci	}
118662306a36Sopenharmony_ci}
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_cistatic u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
118962306a36Sopenharmony_ci{
119062306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hpdev->hbus;
119162306a36Sopenharmony_ci	struct device *dev = &hbus->hdev->device;
119262306a36Sopenharmony_ci	u32 val;
119362306a36Sopenharmony_ci	u16 ret;
119462306a36Sopenharmony_ci	unsigned long flags;
119562306a36Sopenharmony_ci
119662306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->config_lock, flags);
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci	if (hbus->use_calls) {
119962306a36Sopenharmony_ci		phys_addr_t addr = hbus->mem_config->start +
120062306a36Sopenharmony_ci					 CFG_PAGE_OFFSET + PCI_VENDOR_ID;
120162306a36Sopenharmony_ci
120262306a36Sopenharmony_ci		hv_pci_write_mmio(dev, hbus->mem_config->start, 4,
120362306a36Sopenharmony_ci					hpdev->desc.win_slot.slot);
120462306a36Sopenharmony_ci		hv_pci_read_mmio(dev, addr, 2, &val);
120562306a36Sopenharmony_ci		ret = val;  /* Truncates to 16 bits */
120662306a36Sopenharmony_ci	} else {
120762306a36Sopenharmony_ci		void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET +
120862306a36Sopenharmony_ci					     PCI_VENDOR_ID;
120962306a36Sopenharmony_ci		/* Choose the function to be read. (See comment above) */
121062306a36Sopenharmony_ci		writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
121162306a36Sopenharmony_ci		/* Make sure the function was chosen before we start reading. */
121262306a36Sopenharmony_ci		mb();
121362306a36Sopenharmony_ci		/* Read from that function's config space. */
121462306a36Sopenharmony_ci		ret = readw(addr);
121562306a36Sopenharmony_ci		/*
121662306a36Sopenharmony_ci		 * mb() is not required here, because the
121762306a36Sopenharmony_ci		 * spin_unlock_irqrestore() is a barrier.
121862306a36Sopenharmony_ci		 */
121962306a36Sopenharmony_ci	}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->config_lock, flags);
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	return ret;
122462306a36Sopenharmony_ci}
122562306a36Sopenharmony_ci
122662306a36Sopenharmony_ci/**
122762306a36Sopenharmony_ci * _hv_pcifront_write_config() - Internal PCI config write
122862306a36Sopenharmony_ci * @hpdev:	The PCI driver's representation of the device
122962306a36Sopenharmony_ci * @where:	Offset within config space
123062306a36Sopenharmony_ci * @size:	Size of the transfer
123162306a36Sopenharmony_ci * @val:	The data being transferred
123262306a36Sopenharmony_ci */
123362306a36Sopenharmony_cistatic void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where,
123462306a36Sopenharmony_ci				      int size, u32 val)
123562306a36Sopenharmony_ci{
123662306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hpdev->hbus;
123762306a36Sopenharmony_ci	struct device *dev = &hbus->hdev->device;
123862306a36Sopenharmony_ci	int offset = where + CFG_PAGE_OFFSET;
123962306a36Sopenharmony_ci	unsigned long flags;
124062306a36Sopenharmony_ci
124162306a36Sopenharmony_ci	if (where >= PCI_SUBSYSTEM_VENDOR_ID &&
124262306a36Sopenharmony_ci	    where + size <= PCI_CAPABILITY_LIST) {
124362306a36Sopenharmony_ci		/* SSIDs and ROM BARs are read-only */
124462306a36Sopenharmony_ci	} else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) {
124562306a36Sopenharmony_ci		spin_lock_irqsave(&hbus->config_lock, flags);
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci		if (hbus->use_calls) {
124862306a36Sopenharmony_ci			phys_addr_t addr = hbus->mem_config->start + offset;
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_ci			hv_pci_write_mmio(dev, hbus->mem_config->start, 4,
125162306a36Sopenharmony_ci						hpdev->desc.win_slot.slot);
125262306a36Sopenharmony_ci			hv_pci_write_mmio(dev, addr, size, val);
125362306a36Sopenharmony_ci		} else {
125462306a36Sopenharmony_ci			void __iomem *addr = hbus->cfg_addr + offset;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci			/* Choose the function to write. (See comment above) */
125762306a36Sopenharmony_ci			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
125862306a36Sopenharmony_ci			/* Make sure the function was chosen before writing. */
125962306a36Sopenharmony_ci			wmb();
126062306a36Sopenharmony_ci			/* Write to that function's config space. */
126162306a36Sopenharmony_ci			switch (size) {
126262306a36Sopenharmony_ci			case 1:
126362306a36Sopenharmony_ci				writeb(val, addr);
126462306a36Sopenharmony_ci				break;
126562306a36Sopenharmony_ci			case 2:
126662306a36Sopenharmony_ci				writew(val, addr);
126762306a36Sopenharmony_ci				break;
126862306a36Sopenharmony_ci			default:
126962306a36Sopenharmony_ci				writel(val, addr);
127062306a36Sopenharmony_ci				break;
127162306a36Sopenharmony_ci			}
127262306a36Sopenharmony_ci			/*
127362306a36Sopenharmony_ci			 * Make sure the write was done before we release the
127462306a36Sopenharmony_ci			 * spinlock allowing consecutive reads/writes.
127562306a36Sopenharmony_ci			 */
127662306a36Sopenharmony_ci			mb();
127762306a36Sopenharmony_ci		}
127862306a36Sopenharmony_ci		spin_unlock_irqrestore(&hbus->config_lock, flags);
127962306a36Sopenharmony_ci	} else {
128062306a36Sopenharmony_ci		dev_err(dev, "Attempt to write beyond a function's config space.\n");
128162306a36Sopenharmony_ci	}
128262306a36Sopenharmony_ci}
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_ci/**
128562306a36Sopenharmony_ci * hv_pcifront_read_config() - Read configuration space
128662306a36Sopenharmony_ci * @bus: PCI Bus structure
128762306a36Sopenharmony_ci * @devfn: Device/function
128862306a36Sopenharmony_ci * @where: Offset from base
128962306a36Sopenharmony_ci * @size: Byte/word/dword
129062306a36Sopenharmony_ci * @val: Value to be read
129162306a36Sopenharmony_ci *
129262306a36Sopenharmony_ci * Return: PCIBIOS_SUCCESSFUL on success
129362306a36Sopenharmony_ci *	   PCIBIOS_DEVICE_NOT_FOUND on failure
129462306a36Sopenharmony_ci */
129562306a36Sopenharmony_cistatic int hv_pcifront_read_config(struct pci_bus *bus, unsigned int devfn,
129662306a36Sopenharmony_ci				   int where, int size, u32 *val)
129762306a36Sopenharmony_ci{
129862306a36Sopenharmony_ci	struct hv_pcibus_device *hbus =
129962306a36Sopenharmony_ci		container_of(bus->sysdata, struct hv_pcibus_device, sysdata);
130062306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(devfn));
130362306a36Sopenharmony_ci	if (!hpdev)
130462306a36Sopenharmony_ci		return PCIBIOS_DEVICE_NOT_FOUND;
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci	_hv_pcifront_read_config(hpdev, where, size, val);
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci	put_pcichild(hpdev);
130962306a36Sopenharmony_ci	return PCIBIOS_SUCCESSFUL;
131062306a36Sopenharmony_ci}
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_ci/**
131362306a36Sopenharmony_ci * hv_pcifront_write_config() - Write configuration space
131462306a36Sopenharmony_ci * @bus: PCI Bus structure
131562306a36Sopenharmony_ci * @devfn: Device/function
131662306a36Sopenharmony_ci * @where: Offset from base
131762306a36Sopenharmony_ci * @size: Byte/word/dword
131862306a36Sopenharmony_ci * @val: Value to be written to device
131962306a36Sopenharmony_ci *
132062306a36Sopenharmony_ci * Return: PCIBIOS_SUCCESSFUL on success
132162306a36Sopenharmony_ci *	   PCIBIOS_DEVICE_NOT_FOUND on failure
132262306a36Sopenharmony_ci */
132362306a36Sopenharmony_cistatic int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
132462306a36Sopenharmony_ci				    int where, int size, u32 val)
132562306a36Sopenharmony_ci{
132662306a36Sopenharmony_ci	struct hv_pcibus_device *hbus =
132762306a36Sopenharmony_ci	    container_of(bus->sysdata, struct hv_pcibus_device, sysdata);
132862306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
132962306a36Sopenharmony_ci
133062306a36Sopenharmony_ci	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(devfn));
133162306a36Sopenharmony_ci	if (!hpdev)
133262306a36Sopenharmony_ci		return PCIBIOS_DEVICE_NOT_FOUND;
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci	_hv_pcifront_write_config(hpdev, where, size, val);
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_ci	put_pcichild(hpdev);
133762306a36Sopenharmony_ci	return PCIBIOS_SUCCESSFUL;
133862306a36Sopenharmony_ci}
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci/* PCIe operations */
134162306a36Sopenharmony_cistatic struct pci_ops hv_pcifront_ops = {
134262306a36Sopenharmony_ci	.read  = hv_pcifront_read_config,
134362306a36Sopenharmony_ci	.write = hv_pcifront_write_config,
134462306a36Sopenharmony_ci};
134562306a36Sopenharmony_ci
134662306a36Sopenharmony_ci/*
134762306a36Sopenharmony_ci * Paravirtual backchannel
134862306a36Sopenharmony_ci *
134962306a36Sopenharmony_ci * Hyper-V SR-IOV provides a backchannel mechanism in software for
135062306a36Sopenharmony_ci * communication between a VF driver and a PF driver.  These
135162306a36Sopenharmony_ci * "configuration blocks" are similar in concept to PCI configuration space,
135262306a36Sopenharmony_ci * but instead of doing reads and writes in 32-bit chunks through a very slow
135362306a36Sopenharmony_ci * path, packets of up to 128 bytes can be sent or received asynchronously.
135462306a36Sopenharmony_ci *
135562306a36Sopenharmony_ci * Nearly every SR-IOV device contains just such a communications channel in
135662306a36Sopenharmony_ci * hardware, so using this one in software is usually optional.  Using the
135762306a36Sopenharmony_ci * software channel, however, allows driver implementers to leverage software
135862306a36Sopenharmony_ci * tools that fuzz the communications channel looking for vulnerabilities.
135962306a36Sopenharmony_ci *
136062306a36Sopenharmony_ci * The usage model for these packets puts the responsibility for reading or
136162306a36Sopenharmony_ci * writing on the VF driver.  The VF driver sends a read or a write packet,
136262306a36Sopenharmony_ci * indicating which "block" is being referred to by number.
136362306a36Sopenharmony_ci *
136462306a36Sopenharmony_ci * If the PF driver wishes to initiate communication, it can "invalidate" one or
136562306a36Sopenharmony_ci * more of the first 64 blocks.  This invalidation is delivered via a callback
136662306a36Sopenharmony_ci * supplied by the VF driver by this driver.
136762306a36Sopenharmony_ci *
136862306a36Sopenharmony_ci * No protocol is implied, except that supplied by the PF and VF drivers.
136962306a36Sopenharmony_ci */
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_cistruct hv_read_config_compl {
137262306a36Sopenharmony_ci	struct hv_pci_compl comp_pkt;
137362306a36Sopenharmony_ci	void *buf;
137462306a36Sopenharmony_ci	unsigned int len;
137562306a36Sopenharmony_ci	unsigned int bytes_returned;
137662306a36Sopenharmony_ci};
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_ci/**
137962306a36Sopenharmony_ci * hv_pci_read_config_compl() - Invoked when a response packet
138062306a36Sopenharmony_ci * for a read config block operation arrives.
138162306a36Sopenharmony_ci * @context:		Identifies the read config operation
138262306a36Sopenharmony_ci * @resp:		The response packet itself
138362306a36Sopenharmony_ci * @resp_packet_size:	Size in bytes of the response packet
138462306a36Sopenharmony_ci */
138562306a36Sopenharmony_cistatic void hv_pci_read_config_compl(void *context, struct pci_response *resp,
138662306a36Sopenharmony_ci				     int resp_packet_size)
138762306a36Sopenharmony_ci{
138862306a36Sopenharmony_ci	struct hv_read_config_compl *comp = context;
138962306a36Sopenharmony_ci	struct pci_read_block_response *read_resp =
139062306a36Sopenharmony_ci		(struct pci_read_block_response *)resp;
139162306a36Sopenharmony_ci	unsigned int data_len, hdr_len;
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	hdr_len = offsetof(struct pci_read_block_response, bytes);
139462306a36Sopenharmony_ci	if (resp_packet_size < hdr_len) {
139562306a36Sopenharmony_ci		comp->comp_pkt.completion_status = -1;
139662306a36Sopenharmony_ci		goto out;
139762306a36Sopenharmony_ci	}
139862306a36Sopenharmony_ci
139962306a36Sopenharmony_ci	data_len = resp_packet_size - hdr_len;
140062306a36Sopenharmony_ci	if (data_len > 0 && read_resp->status == 0) {
140162306a36Sopenharmony_ci		comp->bytes_returned = min(comp->len, data_len);
140262306a36Sopenharmony_ci		memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
140362306a36Sopenharmony_ci	} else {
140462306a36Sopenharmony_ci		comp->bytes_returned = 0;
140562306a36Sopenharmony_ci	}
140662306a36Sopenharmony_ci
140762306a36Sopenharmony_ci	comp->comp_pkt.completion_status = read_resp->status;
140862306a36Sopenharmony_ciout:
140962306a36Sopenharmony_ci	complete(&comp->comp_pkt.host_event);
141062306a36Sopenharmony_ci}
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_ci/**
141362306a36Sopenharmony_ci * hv_read_config_block() - Sends a read config block request to
141462306a36Sopenharmony_ci * the back-end driver running in the Hyper-V parent partition.
141562306a36Sopenharmony_ci * @pdev:		The PCI driver's representation for this device.
141662306a36Sopenharmony_ci * @buf:		Buffer into which the config block will be copied.
141762306a36Sopenharmony_ci * @len:		Size in bytes of buf.
141862306a36Sopenharmony_ci * @block_id:		Identifies the config block which has been requested.
141962306a36Sopenharmony_ci * @bytes_returned:	Size which came back from the back-end driver.
142062306a36Sopenharmony_ci *
142162306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
142262306a36Sopenharmony_ci */
142362306a36Sopenharmony_cistatic int hv_read_config_block(struct pci_dev *pdev, void *buf,
142462306a36Sopenharmony_ci				unsigned int len, unsigned int block_id,
142562306a36Sopenharmony_ci				unsigned int *bytes_returned)
142662306a36Sopenharmony_ci{
142762306a36Sopenharmony_ci	struct hv_pcibus_device *hbus =
142862306a36Sopenharmony_ci		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
142962306a36Sopenharmony_ci			     sysdata);
143062306a36Sopenharmony_ci	struct {
143162306a36Sopenharmony_ci		struct pci_packet pkt;
143262306a36Sopenharmony_ci		char buf[sizeof(struct pci_read_block)];
143362306a36Sopenharmony_ci	} pkt;
143462306a36Sopenharmony_ci	struct hv_read_config_compl comp_pkt;
143562306a36Sopenharmony_ci	struct pci_read_block *read_blk;
143662306a36Sopenharmony_ci	int ret;
143762306a36Sopenharmony_ci
143862306a36Sopenharmony_ci	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
143962306a36Sopenharmony_ci		return -EINVAL;
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci	init_completion(&comp_pkt.comp_pkt.host_event);
144262306a36Sopenharmony_ci	comp_pkt.buf = buf;
144362306a36Sopenharmony_ci	comp_pkt.len = len;
144462306a36Sopenharmony_ci
144562306a36Sopenharmony_ci	memset(&pkt, 0, sizeof(pkt));
144662306a36Sopenharmony_ci	pkt.pkt.completion_func = hv_pci_read_config_compl;
144762306a36Sopenharmony_ci	pkt.pkt.compl_ctxt = &comp_pkt;
144862306a36Sopenharmony_ci	read_blk = (struct pci_read_block *)&pkt.pkt.message;
144962306a36Sopenharmony_ci	read_blk->message_type.type = PCI_READ_BLOCK;
145062306a36Sopenharmony_ci	read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
145162306a36Sopenharmony_ci	read_blk->block_id = block_id;
145262306a36Sopenharmony_ci	read_blk->bytes_requested = len;
145362306a36Sopenharmony_ci
145462306a36Sopenharmony_ci	ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
145562306a36Sopenharmony_ci			       sizeof(*read_blk), (unsigned long)&pkt.pkt,
145662306a36Sopenharmony_ci			       VM_PKT_DATA_INBAND,
145762306a36Sopenharmony_ci			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
145862306a36Sopenharmony_ci	if (ret)
145962306a36Sopenharmony_ci		return ret;
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci	ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
146262306a36Sopenharmony_ci	if (ret)
146362306a36Sopenharmony_ci		return ret;
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci	if (comp_pkt.comp_pkt.completion_status != 0 ||
146662306a36Sopenharmony_ci	    comp_pkt.bytes_returned == 0) {
146762306a36Sopenharmony_ci		dev_err(&hbus->hdev->device,
146862306a36Sopenharmony_ci			"Read Config Block failed: 0x%x, bytes_returned=%d\n",
146962306a36Sopenharmony_ci			comp_pkt.comp_pkt.completion_status,
147062306a36Sopenharmony_ci			comp_pkt.bytes_returned);
147162306a36Sopenharmony_ci		return -EIO;
147262306a36Sopenharmony_ci	}
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci	*bytes_returned = comp_pkt.bytes_returned;
147562306a36Sopenharmony_ci	return 0;
147662306a36Sopenharmony_ci}
147762306a36Sopenharmony_ci
147862306a36Sopenharmony_ci/**
147962306a36Sopenharmony_ci * hv_pci_write_config_compl() - Invoked when a response packet for a write
148062306a36Sopenharmony_ci * config block operation arrives.
148162306a36Sopenharmony_ci * @context:		Identifies the write config operation
148262306a36Sopenharmony_ci * @resp:		The response packet itself
148362306a36Sopenharmony_ci * @resp_packet_size:	Size in bytes of the response packet
148462306a36Sopenharmony_ci */
148562306a36Sopenharmony_cistatic void hv_pci_write_config_compl(void *context, struct pci_response *resp,
148662306a36Sopenharmony_ci				      int resp_packet_size)
148762306a36Sopenharmony_ci{
148862306a36Sopenharmony_ci	struct hv_pci_compl *comp_pkt = context;
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci	comp_pkt->completion_status = resp->status;
149162306a36Sopenharmony_ci	complete(&comp_pkt->host_event);
149262306a36Sopenharmony_ci}
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci/**
149562306a36Sopenharmony_ci * hv_write_config_block() - Sends a write config block request to the
149662306a36Sopenharmony_ci * back-end driver running in the Hyper-V parent partition.
149762306a36Sopenharmony_ci * @pdev:		The PCI driver's representation for this device.
149862306a36Sopenharmony_ci * @buf:		Buffer from which the config block will	be copied.
149962306a36Sopenharmony_ci * @len:		Size in bytes of buf.
150062306a36Sopenharmony_ci * @block_id:		Identifies the config block which is being written.
150162306a36Sopenharmony_ci *
150262306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
150362306a36Sopenharmony_ci */
150462306a36Sopenharmony_cistatic int hv_write_config_block(struct pci_dev *pdev, void *buf,
150562306a36Sopenharmony_ci				unsigned int len, unsigned int block_id)
150662306a36Sopenharmony_ci{
150762306a36Sopenharmony_ci	struct hv_pcibus_device *hbus =
150862306a36Sopenharmony_ci		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
150962306a36Sopenharmony_ci			     sysdata);
151062306a36Sopenharmony_ci	struct {
151162306a36Sopenharmony_ci		struct pci_packet pkt;
151262306a36Sopenharmony_ci		char buf[sizeof(struct pci_write_block)];
151362306a36Sopenharmony_ci		u32 reserved;
151462306a36Sopenharmony_ci	} pkt;
151562306a36Sopenharmony_ci	struct hv_pci_compl comp_pkt;
151662306a36Sopenharmony_ci	struct pci_write_block *write_blk;
151762306a36Sopenharmony_ci	u32 pkt_size;
151862306a36Sopenharmony_ci	int ret;
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
152162306a36Sopenharmony_ci		return -EINVAL;
152262306a36Sopenharmony_ci
152362306a36Sopenharmony_ci	init_completion(&comp_pkt.host_event);
152462306a36Sopenharmony_ci
152562306a36Sopenharmony_ci	memset(&pkt, 0, sizeof(pkt));
152662306a36Sopenharmony_ci	pkt.pkt.completion_func = hv_pci_write_config_compl;
152762306a36Sopenharmony_ci	pkt.pkt.compl_ctxt = &comp_pkt;
152862306a36Sopenharmony_ci	write_blk = (struct pci_write_block *)&pkt.pkt.message;
152962306a36Sopenharmony_ci	write_blk->message_type.type = PCI_WRITE_BLOCK;
153062306a36Sopenharmony_ci	write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
153162306a36Sopenharmony_ci	write_blk->block_id = block_id;
153262306a36Sopenharmony_ci	write_blk->byte_count = len;
153362306a36Sopenharmony_ci	memcpy(write_blk->bytes, buf, len);
153462306a36Sopenharmony_ci	pkt_size = offsetof(struct pci_write_block, bytes) + len;
153562306a36Sopenharmony_ci	/*
153662306a36Sopenharmony_ci	 * This quirk is required on some hosts shipped around 2018, because
153762306a36Sopenharmony_ci	 * these hosts don't check the pkt_size correctly (new hosts have been
153862306a36Sopenharmony_ci	 * fixed since early 2019). The quirk is also safe on very old hosts
153962306a36Sopenharmony_ci	 * and new hosts, because, on them, what really matters is the length
154062306a36Sopenharmony_ci	 * specified in write_blk->byte_count.
154162306a36Sopenharmony_ci	 */
154262306a36Sopenharmony_ci	pkt_size += sizeof(pkt.reserved);
154362306a36Sopenharmony_ci
154462306a36Sopenharmony_ci	ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
154562306a36Sopenharmony_ci			       (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
154662306a36Sopenharmony_ci			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
154762306a36Sopenharmony_ci	if (ret)
154862306a36Sopenharmony_ci		return ret;
154962306a36Sopenharmony_ci
155062306a36Sopenharmony_ci	ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
155162306a36Sopenharmony_ci	if (ret)
155262306a36Sopenharmony_ci		return ret;
155362306a36Sopenharmony_ci
155462306a36Sopenharmony_ci	if (comp_pkt.completion_status != 0) {
155562306a36Sopenharmony_ci		dev_err(&hbus->hdev->device,
155662306a36Sopenharmony_ci			"Write Config Block failed: 0x%x\n",
155762306a36Sopenharmony_ci			comp_pkt.completion_status);
155862306a36Sopenharmony_ci		return -EIO;
155962306a36Sopenharmony_ci	}
156062306a36Sopenharmony_ci
156162306a36Sopenharmony_ci	return 0;
156262306a36Sopenharmony_ci}
156362306a36Sopenharmony_ci
156462306a36Sopenharmony_ci/**
156562306a36Sopenharmony_ci * hv_register_block_invalidate() - Invoked when a config block invalidation
156662306a36Sopenharmony_ci * arrives from the back-end driver.
156762306a36Sopenharmony_ci * @pdev:		The PCI driver's representation for this device.
156862306a36Sopenharmony_ci * @context:		Identifies the device.
156962306a36Sopenharmony_ci * @block_invalidate:	Identifies all of the blocks being invalidated.
157062306a36Sopenharmony_ci *
157162306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
157262306a36Sopenharmony_ci */
157362306a36Sopenharmony_cistatic int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
157462306a36Sopenharmony_ci					void (*block_invalidate)(void *context,
157562306a36Sopenharmony_ci								 u64 block_mask))
157662306a36Sopenharmony_ci{
157762306a36Sopenharmony_ci	struct hv_pcibus_device *hbus =
157862306a36Sopenharmony_ci		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
157962306a36Sopenharmony_ci			     sysdata);
158062306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
158162306a36Sopenharmony_ci
158262306a36Sopenharmony_ci	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
158362306a36Sopenharmony_ci	if (!hpdev)
158462306a36Sopenharmony_ci		return -ENODEV;
158562306a36Sopenharmony_ci
158662306a36Sopenharmony_ci	hpdev->block_invalidate = block_invalidate;
158762306a36Sopenharmony_ci	hpdev->invalidate_context = context;
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_ci	put_pcichild(hpdev);
159062306a36Sopenharmony_ci	return 0;
159162306a36Sopenharmony_ci
159262306a36Sopenharmony_ci}
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_ci/* Interrupt management hooks */
159562306a36Sopenharmony_cistatic void hv_int_desc_free(struct hv_pci_dev *hpdev,
159662306a36Sopenharmony_ci			     struct tran_int_desc *int_desc)
159762306a36Sopenharmony_ci{
159862306a36Sopenharmony_ci	struct pci_delete_interrupt *int_pkt;
159962306a36Sopenharmony_ci	struct {
160062306a36Sopenharmony_ci		struct pci_packet pkt;
160162306a36Sopenharmony_ci		u8 buffer[sizeof(struct pci_delete_interrupt)];
160262306a36Sopenharmony_ci	} ctxt;
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_ci	if (!int_desc->vector_count) {
160562306a36Sopenharmony_ci		kfree(int_desc);
160662306a36Sopenharmony_ci		return;
160762306a36Sopenharmony_ci	}
160862306a36Sopenharmony_ci	memset(&ctxt, 0, sizeof(ctxt));
160962306a36Sopenharmony_ci	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
161062306a36Sopenharmony_ci	int_pkt->message_type.type =
161162306a36Sopenharmony_ci		PCI_DELETE_INTERRUPT_MESSAGE;
161262306a36Sopenharmony_ci	int_pkt->wslot.slot = hpdev->desc.win_slot.slot;
161362306a36Sopenharmony_ci	int_pkt->int_desc = *int_desc;
161462306a36Sopenharmony_ci	vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, sizeof(*int_pkt),
161562306a36Sopenharmony_ci			 0, VM_PKT_DATA_INBAND, 0);
161662306a36Sopenharmony_ci	kfree(int_desc);
161762306a36Sopenharmony_ci}
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci/**
162062306a36Sopenharmony_ci * hv_msi_free() - Free the MSI.
162162306a36Sopenharmony_ci * @domain:	The interrupt domain pointer
162262306a36Sopenharmony_ci * @info:	Extra MSI-related context
162362306a36Sopenharmony_ci * @irq:	Identifies the IRQ.
162462306a36Sopenharmony_ci *
162562306a36Sopenharmony_ci * The Hyper-V parent partition and hypervisor are tracking the
162662306a36Sopenharmony_ci * messages that are in use, keeping the interrupt redirection
162762306a36Sopenharmony_ci * table up to date.  This callback sends a message that frees
162862306a36Sopenharmony_ci * the IRT entry and related tracking nonsense.
162962306a36Sopenharmony_ci */
163062306a36Sopenharmony_cistatic void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
163162306a36Sopenharmony_ci			unsigned int irq)
163262306a36Sopenharmony_ci{
163362306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
163462306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
163562306a36Sopenharmony_ci	struct pci_dev *pdev;
163662306a36Sopenharmony_ci	struct tran_int_desc *int_desc;
163762306a36Sopenharmony_ci	struct irq_data *irq_data = irq_domain_get_irq_data(domain, irq);
163862306a36Sopenharmony_ci	struct msi_desc *msi = irq_data_get_msi_desc(irq_data);
163962306a36Sopenharmony_ci
164062306a36Sopenharmony_ci	pdev = msi_desc_to_pci_dev(msi);
164162306a36Sopenharmony_ci	hbus = info->data;
164262306a36Sopenharmony_ci	int_desc = irq_data_get_irq_chip_data(irq_data);
164362306a36Sopenharmony_ci	if (!int_desc)
164462306a36Sopenharmony_ci		return;
164562306a36Sopenharmony_ci
164662306a36Sopenharmony_ci	irq_data->chip_data = NULL;
164762306a36Sopenharmony_ci	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
164862306a36Sopenharmony_ci	if (!hpdev) {
164962306a36Sopenharmony_ci		kfree(int_desc);
165062306a36Sopenharmony_ci		return;
165162306a36Sopenharmony_ci	}
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_ci	hv_int_desc_free(hpdev, int_desc);
165462306a36Sopenharmony_ci	put_pcichild(hpdev);
165562306a36Sopenharmony_ci}
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_cistatic void hv_irq_mask(struct irq_data *data)
165862306a36Sopenharmony_ci{
165962306a36Sopenharmony_ci	pci_msi_mask_irq(data);
166062306a36Sopenharmony_ci	if (data->parent_data->chip->irq_mask)
166162306a36Sopenharmony_ci		irq_chip_mask_parent(data);
166262306a36Sopenharmony_ci}
166362306a36Sopenharmony_ci
166462306a36Sopenharmony_cistatic void hv_irq_unmask(struct irq_data *data)
166562306a36Sopenharmony_ci{
166662306a36Sopenharmony_ci	hv_arch_irq_unmask(data);
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_ci	if (data->parent_data->chip->irq_unmask)
166962306a36Sopenharmony_ci		irq_chip_unmask_parent(data);
167062306a36Sopenharmony_ci	pci_msi_unmask_irq(data);
167162306a36Sopenharmony_ci}
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_cistruct compose_comp_ctxt {
167462306a36Sopenharmony_ci	struct hv_pci_compl comp_pkt;
167562306a36Sopenharmony_ci	struct tran_int_desc int_desc;
167662306a36Sopenharmony_ci};
167762306a36Sopenharmony_ci
167862306a36Sopenharmony_cistatic void hv_pci_compose_compl(void *context, struct pci_response *resp,
167962306a36Sopenharmony_ci				 int resp_packet_size)
168062306a36Sopenharmony_ci{
168162306a36Sopenharmony_ci	struct compose_comp_ctxt *comp_pkt = context;
168262306a36Sopenharmony_ci	struct pci_create_int_response *int_resp =
168362306a36Sopenharmony_ci		(struct pci_create_int_response *)resp;
168462306a36Sopenharmony_ci
168562306a36Sopenharmony_ci	if (resp_packet_size < sizeof(*int_resp)) {
168662306a36Sopenharmony_ci		comp_pkt->comp_pkt.completion_status = -1;
168762306a36Sopenharmony_ci		goto out;
168862306a36Sopenharmony_ci	}
168962306a36Sopenharmony_ci	comp_pkt->comp_pkt.completion_status = resp->status;
169062306a36Sopenharmony_ci	comp_pkt->int_desc = int_resp->int_desc;
169162306a36Sopenharmony_ciout:
169262306a36Sopenharmony_ci	complete(&comp_pkt->comp_pkt.host_event);
169362306a36Sopenharmony_ci}
169462306a36Sopenharmony_ci
169562306a36Sopenharmony_cistatic u32 hv_compose_msi_req_v1(
169662306a36Sopenharmony_ci	struct pci_create_interrupt *int_pkt,
169762306a36Sopenharmony_ci	u32 slot, u8 vector, u16 vector_count)
169862306a36Sopenharmony_ci{
169962306a36Sopenharmony_ci	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
170062306a36Sopenharmony_ci	int_pkt->wslot.slot = slot;
170162306a36Sopenharmony_ci	int_pkt->int_desc.vector = vector;
170262306a36Sopenharmony_ci	int_pkt->int_desc.vector_count = vector_count;
170362306a36Sopenharmony_ci	int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_ci	/*
170662306a36Sopenharmony_ci	 * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
170762306a36Sopenharmony_ci	 * hv_irq_unmask().
170862306a36Sopenharmony_ci	 */
170962306a36Sopenharmony_ci	int_pkt->int_desc.cpu_mask = CPU_AFFINITY_ALL;
171062306a36Sopenharmony_ci
171162306a36Sopenharmony_ci	return sizeof(*int_pkt);
171262306a36Sopenharmony_ci}
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci/*
171562306a36Sopenharmony_ci * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and
171662306a36Sopenharmony_ci * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be
171762306a36Sopenharmony_ci * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V
171862306a36Sopenharmony_ci * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is
171962306a36Sopenharmony_ci * not irrelevant because Hyper-V chooses the physical CPU to handle the
172062306a36Sopenharmony_ci * interrupts based on the vCPU specified in message sent to the vPCI VSP in
172162306a36Sopenharmony_ci * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest,
172262306a36Sopenharmony_ci * but assigning too many vPCI device interrupts to the same pCPU can cause a
172362306a36Sopenharmony_ci * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V
172462306a36Sopenharmony_ci * to spread out the pCPUs that it selects.
172562306a36Sopenharmony_ci *
172662306a36Sopenharmony_ci * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu()
172762306a36Sopenharmony_ci * to always return the same dummy vCPU, because a second call to
172862306a36Sopenharmony_ci * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a
172962306a36Sopenharmony_ci * new pCPU for the interrupt. But for the multi-MSI case, the second call to
173062306a36Sopenharmony_ci * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the
173162306a36Sopenharmony_ci * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that
173262306a36Sopenharmony_ci * the pCPUs are spread out. All interrupts for a multi-MSI device end up using
173362306a36Sopenharmony_ci * the same pCPU, even though the vCPUs will be spread out by later calls
173462306a36Sopenharmony_ci * to hv_irq_unmask(), but that is the best we can do now.
173562306a36Sopenharmony_ci *
173662306a36Sopenharmony_ci * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not*
173762306a36Sopenharmony_ci * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an
173862306a36Sopenharmony_ci * enhancement is planned for a future version. With that enhancement, the
173962306a36Sopenharmony_ci * dummy vCPU selection won't matter, and interrupts for the same multi-MSI
174062306a36Sopenharmony_ci * device will be spread across multiple pCPUs.
174162306a36Sopenharmony_ci */
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci/*
174462306a36Sopenharmony_ci * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
174562306a36Sopenharmony_ci * by subsequent retarget in hv_irq_unmask().
174662306a36Sopenharmony_ci */
174762306a36Sopenharmony_cistatic int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
174862306a36Sopenharmony_ci{
174962306a36Sopenharmony_ci	return cpumask_first_and(affinity, cpu_online_mask);
175062306a36Sopenharmony_ci}
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci/*
175362306a36Sopenharmony_ci * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
175462306a36Sopenharmony_ci */
175562306a36Sopenharmony_cistatic int hv_compose_multi_msi_req_get_cpu(void)
175662306a36Sopenharmony_ci{
175762306a36Sopenharmony_ci	static DEFINE_SPINLOCK(multi_msi_cpu_lock);
175862306a36Sopenharmony_ci
175962306a36Sopenharmony_ci	/* -1 means starting with CPU 0 */
176062306a36Sopenharmony_ci	static int cpu_next = -1;
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_ci	unsigned long flags;
176362306a36Sopenharmony_ci	int cpu;
176462306a36Sopenharmony_ci
176562306a36Sopenharmony_ci	spin_lock_irqsave(&multi_msi_cpu_lock, flags);
176662306a36Sopenharmony_ci
176762306a36Sopenharmony_ci	cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
176862306a36Sopenharmony_ci				     false);
176962306a36Sopenharmony_ci	cpu = cpu_next;
177062306a36Sopenharmony_ci
177162306a36Sopenharmony_ci	spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
177262306a36Sopenharmony_ci
177362306a36Sopenharmony_ci	return cpu;
177462306a36Sopenharmony_ci}
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_cistatic u32 hv_compose_msi_req_v2(
177762306a36Sopenharmony_ci	struct pci_create_interrupt2 *int_pkt, int cpu,
177862306a36Sopenharmony_ci	u32 slot, u8 vector, u16 vector_count)
177962306a36Sopenharmony_ci{
178062306a36Sopenharmony_ci	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;
178162306a36Sopenharmony_ci	int_pkt->wslot.slot = slot;
178262306a36Sopenharmony_ci	int_pkt->int_desc.vector = vector;
178362306a36Sopenharmony_ci	int_pkt->int_desc.vector_count = vector_count;
178462306a36Sopenharmony_ci	int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
178562306a36Sopenharmony_ci	int_pkt->int_desc.processor_array[0] =
178662306a36Sopenharmony_ci		hv_cpu_number_to_vp_number(cpu);
178762306a36Sopenharmony_ci	int_pkt->int_desc.processor_count = 1;
178862306a36Sopenharmony_ci
178962306a36Sopenharmony_ci	return sizeof(*int_pkt);
179062306a36Sopenharmony_ci}
179162306a36Sopenharmony_ci
179262306a36Sopenharmony_cistatic u32 hv_compose_msi_req_v3(
179362306a36Sopenharmony_ci	struct pci_create_interrupt3 *int_pkt, int cpu,
179462306a36Sopenharmony_ci	u32 slot, u32 vector, u16 vector_count)
179562306a36Sopenharmony_ci{
179662306a36Sopenharmony_ci	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
179762306a36Sopenharmony_ci	int_pkt->wslot.slot = slot;
179862306a36Sopenharmony_ci	int_pkt->int_desc.vector = vector;
179962306a36Sopenharmony_ci	int_pkt->int_desc.reserved = 0;
180062306a36Sopenharmony_ci	int_pkt->int_desc.vector_count = vector_count;
180162306a36Sopenharmony_ci	int_pkt->int_desc.delivery_mode = DELIVERY_MODE;
180262306a36Sopenharmony_ci	int_pkt->int_desc.processor_array[0] =
180362306a36Sopenharmony_ci		hv_cpu_number_to_vp_number(cpu);
180462306a36Sopenharmony_ci	int_pkt->int_desc.processor_count = 1;
180562306a36Sopenharmony_ci
180662306a36Sopenharmony_ci	return sizeof(*int_pkt);
180762306a36Sopenharmony_ci}
180862306a36Sopenharmony_ci
180962306a36Sopenharmony_ci/**
181062306a36Sopenharmony_ci * hv_compose_msi_msg() - Supplies a valid MSI address/data
181162306a36Sopenharmony_ci * @data:	Everything about this MSI
181262306a36Sopenharmony_ci * @msg:	Buffer that is filled in by this function
181362306a36Sopenharmony_ci *
181462306a36Sopenharmony_ci * This function unpacks the IRQ looking for target CPU set, IDT
181562306a36Sopenharmony_ci * vector and mode and sends a message to the parent partition
181662306a36Sopenharmony_ci * asking for a mapping for that tuple in this partition.  The
181762306a36Sopenharmony_ci * response supplies a data value and address to which that data
181862306a36Sopenharmony_ci * should be written to trigger that interrupt.
181962306a36Sopenharmony_ci */
182062306a36Sopenharmony_cistatic void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
182162306a36Sopenharmony_ci{
182262306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
182362306a36Sopenharmony_ci	struct vmbus_channel *channel;
182462306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
182562306a36Sopenharmony_ci	struct pci_bus *pbus;
182662306a36Sopenharmony_ci	struct pci_dev *pdev;
182762306a36Sopenharmony_ci	const struct cpumask *dest;
182862306a36Sopenharmony_ci	struct compose_comp_ctxt comp;
182962306a36Sopenharmony_ci	struct tran_int_desc *int_desc;
183062306a36Sopenharmony_ci	struct msi_desc *msi_desc;
183162306a36Sopenharmony_ci	/*
183262306a36Sopenharmony_ci	 * vector_count should be u16: see hv_msi_desc, hv_msi_desc2
183362306a36Sopenharmony_ci	 * and hv_msi_desc3. vector must be u32: see hv_msi_desc3.
183462306a36Sopenharmony_ci	 */
183562306a36Sopenharmony_ci	u16 vector_count;
183662306a36Sopenharmony_ci	u32 vector;
183762306a36Sopenharmony_ci	struct {
183862306a36Sopenharmony_ci		struct pci_packet pci_pkt;
183962306a36Sopenharmony_ci		union {
184062306a36Sopenharmony_ci			struct pci_create_interrupt v1;
184162306a36Sopenharmony_ci			struct pci_create_interrupt2 v2;
184262306a36Sopenharmony_ci			struct pci_create_interrupt3 v3;
184362306a36Sopenharmony_ci		} int_pkts;
184462306a36Sopenharmony_ci	} __packed ctxt;
184562306a36Sopenharmony_ci	bool multi_msi;
184662306a36Sopenharmony_ci	u64 trans_id;
184762306a36Sopenharmony_ci	u32 size;
184862306a36Sopenharmony_ci	int ret;
184962306a36Sopenharmony_ci	int cpu;
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_ci	msi_desc  = irq_data_get_msi_desc(data);
185262306a36Sopenharmony_ci	multi_msi = !msi_desc->pci.msi_attrib.is_msix &&
185362306a36Sopenharmony_ci		    msi_desc->nvec_used > 1;
185462306a36Sopenharmony_ci
185562306a36Sopenharmony_ci	/* Reuse the previous allocation */
185662306a36Sopenharmony_ci	if (data->chip_data && multi_msi) {
185762306a36Sopenharmony_ci		int_desc = data->chip_data;
185862306a36Sopenharmony_ci		msg->address_hi = int_desc->address >> 32;
185962306a36Sopenharmony_ci		msg->address_lo = int_desc->address & 0xffffffff;
186062306a36Sopenharmony_ci		msg->data = int_desc->data;
186162306a36Sopenharmony_ci		return;
186262306a36Sopenharmony_ci	}
186362306a36Sopenharmony_ci
186462306a36Sopenharmony_ci	pdev = msi_desc_to_pci_dev(msi_desc);
186562306a36Sopenharmony_ci	dest = irq_data_get_effective_affinity_mask(data);
186662306a36Sopenharmony_ci	pbus = pdev->bus;
186762306a36Sopenharmony_ci	hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
186862306a36Sopenharmony_ci	channel = hbus->hdev->channel;
186962306a36Sopenharmony_ci	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
187062306a36Sopenharmony_ci	if (!hpdev)
187162306a36Sopenharmony_ci		goto return_null_message;
187262306a36Sopenharmony_ci
187362306a36Sopenharmony_ci	/* Free any previous message that might have already been composed. */
187462306a36Sopenharmony_ci	if (data->chip_data && !multi_msi) {
187562306a36Sopenharmony_ci		int_desc = data->chip_data;
187662306a36Sopenharmony_ci		data->chip_data = NULL;
187762306a36Sopenharmony_ci		hv_int_desc_free(hpdev, int_desc);
187862306a36Sopenharmony_ci	}
187962306a36Sopenharmony_ci
188062306a36Sopenharmony_ci	int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);
188162306a36Sopenharmony_ci	if (!int_desc)
188262306a36Sopenharmony_ci		goto drop_reference;
188362306a36Sopenharmony_ci
188462306a36Sopenharmony_ci	if (multi_msi) {
188562306a36Sopenharmony_ci		/*
188662306a36Sopenharmony_ci		 * If this is not the first MSI of Multi MSI, we already have
188762306a36Sopenharmony_ci		 * a mapping.  Can exit early.
188862306a36Sopenharmony_ci		 */
188962306a36Sopenharmony_ci		if (msi_desc->irq != data->irq) {
189062306a36Sopenharmony_ci			data->chip_data = int_desc;
189162306a36Sopenharmony_ci			int_desc->address = msi_desc->msg.address_lo |
189262306a36Sopenharmony_ci					    (u64)msi_desc->msg.address_hi << 32;
189362306a36Sopenharmony_ci			int_desc->data = msi_desc->msg.data +
189462306a36Sopenharmony_ci					 (data->irq - msi_desc->irq);
189562306a36Sopenharmony_ci			msg->address_hi = msi_desc->msg.address_hi;
189662306a36Sopenharmony_ci			msg->address_lo = msi_desc->msg.address_lo;
189762306a36Sopenharmony_ci			msg->data = int_desc->data;
189862306a36Sopenharmony_ci			put_pcichild(hpdev);
189962306a36Sopenharmony_ci			return;
190062306a36Sopenharmony_ci		}
190162306a36Sopenharmony_ci		/*
190262306a36Sopenharmony_ci		 * The vector we select here is a dummy value.  The correct
190362306a36Sopenharmony_ci		 * value gets sent to the hypervisor in unmask().  This needs
190462306a36Sopenharmony_ci		 * to be aligned with the count, and also not zero.  Multi-msi
190562306a36Sopenharmony_ci		 * is powers of 2 up to 32, so 32 will always work here.
190662306a36Sopenharmony_ci		 */
190762306a36Sopenharmony_ci		vector = 32;
190862306a36Sopenharmony_ci		vector_count = msi_desc->nvec_used;
190962306a36Sopenharmony_ci		cpu = hv_compose_multi_msi_req_get_cpu();
191062306a36Sopenharmony_ci	} else {
191162306a36Sopenharmony_ci		vector = hv_msi_get_int_vector(data);
191262306a36Sopenharmony_ci		vector_count = 1;
191362306a36Sopenharmony_ci		cpu = hv_compose_msi_req_get_cpu(dest);
191462306a36Sopenharmony_ci	}
191562306a36Sopenharmony_ci
191662306a36Sopenharmony_ci	/*
191762306a36Sopenharmony_ci	 * hv_compose_msi_req_v1 and v2 are for x86 only, meaning 'vector'
191862306a36Sopenharmony_ci	 * can't exceed u8. Cast 'vector' down to u8 for v1/v2 explicitly
191962306a36Sopenharmony_ci	 * for better readability.
192062306a36Sopenharmony_ci	 */
192162306a36Sopenharmony_ci	memset(&ctxt, 0, sizeof(ctxt));
192262306a36Sopenharmony_ci	init_completion(&comp.comp_pkt.host_event);
192362306a36Sopenharmony_ci	ctxt.pci_pkt.completion_func = hv_pci_compose_compl;
192462306a36Sopenharmony_ci	ctxt.pci_pkt.compl_ctxt = &comp;
192562306a36Sopenharmony_ci
192662306a36Sopenharmony_ci	switch (hbus->protocol_version) {
192762306a36Sopenharmony_ci	case PCI_PROTOCOL_VERSION_1_1:
192862306a36Sopenharmony_ci		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1,
192962306a36Sopenharmony_ci					hpdev->desc.win_slot.slot,
193062306a36Sopenharmony_ci					(u8)vector,
193162306a36Sopenharmony_ci					vector_count);
193262306a36Sopenharmony_ci		break;
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_ci	case PCI_PROTOCOL_VERSION_1_2:
193562306a36Sopenharmony_ci	case PCI_PROTOCOL_VERSION_1_3:
193662306a36Sopenharmony_ci		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2,
193762306a36Sopenharmony_ci					cpu,
193862306a36Sopenharmony_ci					hpdev->desc.win_slot.slot,
193962306a36Sopenharmony_ci					(u8)vector,
194062306a36Sopenharmony_ci					vector_count);
194162306a36Sopenharmony_ci		break;
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_ci	case PCI_PROTOCOL_VERSION_1_4:
194462306a36Sopenharmony_ci		size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
194562306a36Sopenharmony_ci					cpu,
194662306a36Sopenharmony_ci					hpdev->desc.win_slot.slot,
194762306a36Sopenharmony_ci					vector,
194862306a36Sopenharmony_ci					vector_count);
194962306a36Sopenharmony_ci		break;
195062306a36Sopenharmony_ci
195162306a36Sopenharmony_ci	default:
195262306a36Sopenharmony_ci		/* As we only negotiate protocol versions known to this driver,
195362306a36Sopenharmony_ci		 * this path should never hit. However, this is it not a hot
195462306a36Sopenharmony_ci		 * path so we print a message to aid future updates.
195562306a36Sopenharmony_ci		 */
195662306a36Sopenharmony_ci		dev_err(&hbus->hdev->device,
195762306a36Sopenharmony_ci			"Unexpected vPCI protocol, update driver.");
195862306a36Sopenharmony_ci		goto free_int_desc;
195962306a36Sopenharmony_ci	}
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci	ret = vmbus_sendpacket_getid(hpdev->hbus->hdev->channel, &ctxt.int_pkts,
196262306a36Sopenharmony_ci				     size, (unsigned long)&ctxt.pci_pkt,
196362306a36Sopenharmony_ci				     &trans_id, VM_PKT_DATA_INBAND,
196462306a36Sopenharmony_ci				     VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
196562306a36Sopenharmony_ci	if (ret) {
196662306a36Sopenharmony_ci		dev_err(&hbus->hdev->device,
196762306a36Sopenharmony_ci			"Sending request for interrupt failed: 0x%x",
196862306a36Sopenharmony_ci			comp.comp_pkt.completion_status);
196962306a36Sopenharmony_ci		goto free_int_desc;
197062306a36Sopenharmony_ci	}
197162306a36Sopenharmony_ci
197262306a36Sopenharmony_ci	/*
197362306a36Sopenharmony_ci	 * Prevents hv_pci_onchannelcallback() from running concurrently
197462306a36Sopenharmony_ci	 * in the tasklet.
197562306a36Sopenharmony_ci	 */
197662306a36Sopenharmony_ci	tasklet_disable_in_atomic(&channel->callback_event);
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci	/*
197962306a36Sopenharmony_ci	 * Since this function is called with IRQ locks held, can't
198062306a36Sopenharmony_ci	 * do normal wait for completion; instead poll.
198162306a36Sopenharmony_ci	 */
198262306a36Sopenharmony_ci	while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
198362306a36Sopenharmony_ci		unsigned long flags;
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci		/* 0xFFFF means an invalid PCI VENDOR ID. */
198662306a36Sopenharmony_ci		if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
198762306a36Sopenharmony_ci			dev_err_once(&hbus->hdev->device,
198862306a36Sopenharmony_ci				     "the device has gone\n");
198962306a36Sopenharmony_ci			goto enable_tasklet;
199062306a36Sopenharmony_ci		}
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci		/*
199362306a36Sopenharmony_ci		 * Make sure that the ring buffer data structure doesn't get
199462306a36Sopenharmony_ci		 * freed while we dereference the ring buffer pointer.  Test
199562306a36Sopenharmony_ci		 * for the channel's onchannel_callback being NULL within a
199662306a36Sopenharmony_ci		 * sched_lock critical section.  See also the inline comments
199762306a36Sopenharmony_ci		 * in vmbus_reset_channel_cb().
199862306a36Sopenharmony_ci		 */
199962306a36Sopenharmony_ci		spin_lock_irqsave(&channel->sched_lock, flags);
200062306a36Sopenharmony_ci		if (unlikely(channel->onchannel_callback == NULL)) {
200162306a36Sopenharmony_ci			spin_unlock_irqrestore(&channel->sched_lock, flags);
200262306a36Sopenharmony_ci			goto enable_tasklet;
200362306a36Sopenharmony_ci		}
200462306a36Sopenharmony_ci		hv_pci_onchannelcallback(hbus);
200562306a36Sopenharmony_ci		spin_unlock_irqrestore(&channel->sched_lock, flags);
200662306a36Sopenharmony_ci
200762306a36Sopenharmony_ci		udelay(100);
200862306a36Sopenharmony_ci	}
200962306a36Sopenharmony_ci
201062306a36Sopenharmony_ci	tasklet_enable(&channel->callback_event);
201162306a36Sopenharmony_ci
201262306a36Sopenharmony_ci	if (comp.comp_pkt.completion_status < 0) {
201362306a36Sopenharmony_ci		dev_err(&hbus->hdev->device,
201462306a36Sopenharmony_ci			"Request for interrupt failed: 0x%x",
201562306a36Sopenharmony_ci			comp.comp_pkt.completion_status);
201662306a36Sopenharmony_ci		goto free_int_desc;
201762306a36Sopenharmony_ci	}
201862306a36Sopenharmony_ci
201962306a36Sopenharmony_ci	/*
202062306a36Sopenharmony_ci	 * Record the assignment so that this can be unwound later. Using
202162306a36Sopenharmony_ci	 * irq_set_chip_data() here would be appropriate, but the lock it takes
202262306a36Sopenharmony_ci	 * is already held.
202362306a36Sopenharmony_ci	 */
202462306a36Sopenharmony_ci	*int_desc = comp.int_desc;
202562306a36Sopenharmony_ci	data->chip_data = int_desc;
202662306a36Sopenharmony_ci
202762306a36Sopenharmony_ci	/* Pass up the result. */
202862306a36Sopenharmony_ci	msg->address_hi = comp.int_desc.address >> 32;
202962306a36Sopenharmony_ci	msg->address_lo = comp.int_desc.address & 0xffffffff;
203062306a36Sopenharmony_ci	msg->data = comp.int_desc.data;
203162306a36Sopenharmony_ci
203262306a36Sopenharmony_ci	put_pcichild(hpdev);
203362306a36Sopenharmony_ci	return;
203462306a36Sopenharmony_ci
203562306a36Sopenharmony_cienable_tasklet:
203662306a36Sopenharmony_ci	tasklet_enable(&channel->callback_event);
203762306a36Sopenharmony_ci	/*
203862306a36Sopenharmony_ci	 * The completion packet on the stack becomes invalid after 'return';
203962306a36Sopenharmony_ci	 * remove the ID from the VMbus requestor if the identifier is still
204062306a36Sopenharmony_ci	 * mapped to/associated with the packet.  (The identifier could have
204162306a36Sopenharmony_ci	 * been 're-used', i.e., already removed and (re-)mapped.)
204262306a36Sopenharmony_ci	 *
204362306a36Sopenharmony_ci	 * Cf. hv_pci_onchannelcallback().
204462306a36Sopenharmony_ci	 */
204562306a36Sopenharmony_ci	vmbus_request_addr_match(channel, trans_id, (unsigned long)&ctxt.pci_pkt);
204662306a36Sopenharmony_cifree_int_desc:
204762306a36Sopenharmony_ci	kfree(int_desc);
204862306a36Sopenharmony_cidrop_reference:
204962306a36Sopenharmony_ci	put_pcichild(hpdev);
205062306a36Sopenharmony_cireturn_null_message:
205162306a36Sopenharmony_ci	msg->address_hi = 0;
205262306a36Sopenharmony_ci	msg->address_lo = 0;
205362306a36Sopenharmony_ci	msg->data = 0;
205462306a36Sopenharmony_ci}
205562306a36Sopenharmony_ci
205662306a36Sopenharmony_ci/* HW Interrupt Chip Descriptor */
205762306a36Sopenharmony_cistatic struct irq_chip hv_msi_irq_chip = {
205862306a36Sopenharmony_ci	.name			= "Hyper-V PCIe MSI",
205962306a36Sopenharmony_ci	.irq_compose_msi_msg	= hv_compose_msi_msg,
206062306a36Sopenharmony_ci	.irq_set_affinity	= irq_chip_set_affinity_parent,
206162306a36Sopenharmony_ci#ifdef CONFIG_X86
206262306a36Sopenharmony_ci	.irq_ack		= irq_chip_ack_parent,
206362306a36Sopenharmony_ci#elif defined(CONFIG_ARM64)
206462306a36Sopenharmony_ci	.irq_eoi		= irq_chip_eoi_parent,
206562306a36Sopenharmony_ci#endif
206662306a36Sopenharmony_ci	.irq_mask		= hv_irq_mask,
206762306a36Sopenharmony_ci	.irq_unmask		= hv_irq_unmask,
206862306a36Sopenharmony_ci};
206962306a36Sopenharmony_ci
207062306a36Sopenharmony_cistatic struct msi_domain_ops hv_msi_ops = {
207162306a36Sopenharmony_ci	.msi_prepare	= hv_msi_prepare,
207262306a36Sopenharmony_ci	.msi_free	= hv_msi_free,
207362306a36Sopenharmony_ci};
207462306a36Sopenharmony_ci
207562306a36Sopenharmony_ci/**
207662306a36Sopenharmony_ci * hv_pcie_init_irq_domain() - Initialize IRQ domain
207762306a36Sopenharmony_ci * @hbus:	The root PCI bus
207862306a36Sopenharmony_ci *
207962306a36Sopenharmony_ci * This function creates an IRQ domain which will be used for
208062306a36Sopenharmony_ci * interrupts from devices that have been passed through.  These
208162306a36Sopenharmony_ci * devices only support MSI and MSI-X, not line-based interrupts
208262306a36Sopenharmony_ci * or simulations of line-based interrupts through PCIe's
208362306a36Sopenharmony_ci * fabric-layer messages.  Because interrupts are remapped, we
208462306a36Sopenharmony_ci * can support multi-message MSI here.
208562306a36Sopenharmony_ci *
208662306a36Sopenharmony_ci * Return: '0' on success and error value on failure
208762306a36Sopenharmony_ci */
208862306a36Sopenharmony_cistatic int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
208962306a36Sopenharmony_ci{
209062306a36Sopenharmony_ci	hbus->msi_info.chip = &hv_msi_irq_chip;
209162306a36Sopenharmony_ci	hbus->msi_info.ops = &hv_msi_ops;
209262306a36Sopenharmony_ci	hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
209362306a36Sopenharmony_ci		MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
209462306a36Sopenharmony_ci		MSI_FLAG_PCI_MSIX);
209562306a36Sopenharmony_ci	hbus->msi_info.handler = FLOW_HANDLER;
209662306a36Sopenharmony_ci	hbus->msi_info.handler_name = FLOW_NAME;
209762306a36Sopenharmony_ci	hbus->msi_info.data = hbus;
209862306a36Sopenharmony_ci	hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
209962306a36Sopenharmony_ci						     &hbus->msi_info,
210062306a36Sopenharmony_ci						     hv_pci_get_root_domain());
210162306a36Sopenharmony_ci	if (!hbus->irq_domain) {
210262306a36Sopenharmony_ci		dev_err(&hbus->hdev->device,
210362306a36Sopenharmony_ci			"Failed to build an MSI IRQ domain\n");
210462306a36Sopenharmony_ci		return -ENODEV;
210562306a36Sopenharmony_ci	}
210662306a36Sopenharmony_ci
210762306a36Sopenharmony_ci	dev_set_msi_domain(&hbus->bridge->dev, hbus->irq_domain);
210862306a36Sopenharmony_ci
210962306a36Sopenharmony_ci	return 0;
211062306a36Sopenharmony_ci}
211162306a36Sopenharmony_ci
211262306a36Sopenharmony_ci/**
211362306a36Sopenharmony_ci * get_bar_size() - Get the address space consumed by a BAR
211462306a36Sopenharmony_ci * @bar_val:	Value that a BAR returned after -1 was written
211562306a36Sopenharmony_ci *              to it.
211662306a36Sopenharmony_ci *
211762306a36Sopenharmony_ci * This function returns the size of the BAR, rounded up to 1
211862306a36Sopenharmony_ci * page.  It has to be rounded up because the hypervisor's page
211962306a36Sopenharmony_ci * table entry that maps the BAR into the VM can't specify an
212062306a36Sopenharmony_ci * offset within a page.  The invariant is that the hypervisor
212162306a36Sopenharmony_ci * must place any BARs of smaller than page length at the
212262306a36Sopenharmony_ci * beginning of a page.
212362306a36Sopenharmony_ci *
212462306a36Sopenharmony_ci * Return:	Size in bytes of the consumed MMIO space.
212562306a36Sopenharmony_ci */
212662306a36Sopenharmony_cistatic u64 get_bar_size(u64 bar_val)
212762306a36Sopenharmony_ci{
212862306a36Sopenharmony_ci	return round_up((1 + ~(bar_val & PCI_BASE_ADDRESS_MEM_MASK)),
212962306a36Sopenharmony_ci			PAGE_SIZE);
213062306a36Sopenharmony_ci}
213162306a36Sopenharmony_ci
213262306a36Sopenharmony_ci/**
213362306a36Sopenharmony_ci * survey_child_resources() - Total all MMIO requirements
213462306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
213562306a36Sopenharmony_ci */
213662306a36Sopenharmony_cistatic void survey_child_resources(struct hv_pcibus_device *hbus)
213762306a36Sopenharmony_ci{
213862306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
213962306a36Sopenharmony_ci	resource_size_t bar_size = 0;
214062306a36Sopenharmony_ci	unsigned long flags;
214162306a36Sopenharmony_ci	struct completion *event;
214262306a36Sopenharmony_ci	u64 bar_val;
214362306a36Sopenharmony_ci	int i;
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	/* If nobody is waiting on the answer, don't compute it. */
214662306a36Sopenharmony_ci	event = xchg(&hbus->survey_event, NULL);
214762306a36Sopenharmony_ci	if (!event)
214862306a36Sopenharmony_ci		return;
214962306a36Sopenharmony_ci
215062306a36Sopenharmony_ci	/* If the answer has already been computed, go with it. */
215162306a36Sopenharmony_ci	if (hbus->low_mmio_space || hbus->high_mmio_space) {
215262306a36Sopenharmony_ci		complete(event);
215362306a36Sopenharmony_ci		return;
215462306a36Sopenharmony_ci	}
215562306a36Sopenharmony_ci
215662306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci	/*
215962306a36Sopenharmony_ci	 * Due to an interesting quirk of the PCI spec, all memory regions
216062306a36Sopenharmony_ci	 * for a child device are a power of 2 in size and aligned in memory,
216162306a36Sopenharmony_ci	 * so it's sufficient to just add them up without tracking alignment.
216262306a36Sopenharmony_ci	 */
216362306a36Sopenharmony_ci	list_for_each_entry(hpdev, &hbus->children, list_entry) {
216462306a36Sopenharmony_ci		for (i = 0; i < PCI_STD_NUM_BARS; i++) {
216562306a36Sopenharmony_ci			if (hpdev->probed_bar[i] & PCI_BASE_ADDRESS_SPACE_IO)
216662306a36Sopenharmony_ci				dev_err(&hbus->hdev->device,
216762306a36Sopenharmony_ci					"There's an I/O BAR in this list!\n");
216862306a36Sopenharmony_ci
216962306a36Sopenharmony_ci			if (hpdev->probed_bar[i] != 0) {
217062306a36Sopenharmony_ci				/*
217162306a36Sopenharmony_ci				 * A probed BAR has all the upper bits set that
217262306a36Sopenharmony_ci				 * can be changed.
217362306a36Sopenharmony_ci				 */
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci				bar_val = hpdev->probed_bar[i];
217662306a36Sopenharmony_ci				if (bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64)
217762306a36Sopenharmony_ci					bar_val |=
217862306a36Sopenharmony_ci					((u64)hpdev->probed_bar[++i] << 32);
217962306a36Sopenharmony_ci				else
218062306a36Sopenharmony_ci					bar_val |= 0xffffffff00000000ULL;
218162306a36Sopenharmony_ci
218262306a36Sopenharmony_ci				bar_size = get_bar_size(bar_val);
218362306a36Sopenharmony_ci
218462306a36Sopenharmony_ci				if (bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64)
218562306a36Sopenharmony_ci					hbus->high_mmio_space += bar_size;
218662306a36Sopenharmony_ci				else
218762306a36Sopenharmony_ci					hbus->low_mmio_space += bar_size;
218862306a36Sopenharmony_ci			}
218962306a36Sopenharmony_ci		}
219062306a36Sopenharmony_ci	}
219162306a36Sopenharmony_ci
219262306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
219362306a36Sopenharmony_ci	complete(event);
219462306a36Sopenharmony_ci}
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci/**
219762306a36Sopenharmony_ci * prepopulate_bars() - Fill in BARs with defaults
219862306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
219962306a36Sopenharmony_ci *
220062306a36Sopenharmony_ci * The core PCI driver code seems much, much happier if the BARs
220162306a36Sopenharmony_ci * for a device have values upon first scan. So fill them in.
220262306a36Sopenharmony_ci * The algorithm below works down from large sizes to small,
220362306a36Sopenharmony_ci * attempting to pack the assignments optimally. The assumption,
220462306a36Sopenharmony_ci * enforced in other parts of the code, is that the beginning of
220562306a36Sopenharmony_ci * the memory-mapped I/O space will be aligned on the largest
220662306a36Sopenharmony_ci * BAR size.
220762306a36Sopenharmony_ci */
220862306a36Sopenharmony_cistatic void prepopulate_bars(struct hv_pcibus_device *hbus)
220962306a36Sopenharmony_ci{
221062306a36Sopenharmony_ci	resource_size_t high_size = 0;
221162306a36Sopenharmony_ci	resource_size_t low_size = 0;
221262306a36Sopenharmony_ci	resource_size_t high_base = 0;
221362306a36Sopenharmony_ci	resource_size_t low_base = 0;
221462306a36Sopenharmony_ci	resource_size_t bar_size;
221562306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
221662306a36Sopenharmony_ci	unsigned long flags;
221762306a36Sopenharmony_ci	u64 bar_val;
221862306a36Sopenharmony_ci	u32 command;
221962306a36Sopenharmony_ci	bool high;
222062306a36Sopenharmony_ci	int i;
222162306a36Sopenharmony_ci
222262306a36Sopenharmony_ci	if (hbus->low_mmio_space) {
222362306a36Sopenharmony_ci		low_size = 1ULL << (63 - __builtin_clzll(hbus->low_mmio_space));
222462306a36Sopenharmony_ci		low_base = hbus->low_mmio_res->start;
222562306a36Sopenharmony_ci	}
222662306a36Sopenharmony_ci
222762306a36Sopenharmony_ci	if (hbus->high_mmio_space) {
222862306a36Sopenharmony_ci		high_size = 1ULL <<
222962306a36Sopenharmony_ci			(63 - __builtin_clzll(hbus->high_mmio_space));
223062306a36Sopenharmony_ci		high_base = hbus->high_mmio_res->start;
223162306a36Sopenharmony_ci	}
223262306a36Sopenharmony_ci
223362306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
223462306a36Sopenharmony_ci
223562306a36Sopenharmony_ci	/*
223662306a36Sopenharmony_ci	 * Clear the memory enable bit, in case it's already set. This occurs
223762306a36Sopenharmony_ci	 * in the suspend path of hibernation, where the device is suspended,
223862306a36Sopenharmony_ci	 * resumed and suspended again: see hibernation_snapshot() and
223962306a36Sopenharmony_ci	 * hibernation_platform_enter().
224062306a36Sopenharmony_ci	 *
224162306a36Sopenharmony_ci	 * If the memory enable bit is already set, Hyper-V silently ignores
224262306a36Sopenharmony_ci	 * the below BAR updates, and the related PCI device driver can not
224362306a36Sopenharmony_ci	 * work, because reading from the device register(s) always returns
224462306a36Sopenharmony_ci	 * 0xFFFFFFFF (PCI_ERROR_RESPONSE).
224562306a36Sopenharmony_ci	 */
224662306a36Sopenharmony_ci	list_for_each_entry(hpdev, &hbus->children, list_entry) {
224762306a36Sopenharmony_ci		_hv_pcifront_read_config(hpdev, PCI_COMMAND, 2, &command);
224862306a36Sopenharmony_ci		command &= ~PCI_COMMAND_MEMORY;
224962306a36Sopenharmony_ci		_hv_pcifront_write_config(hpdev, PCI_COMMAND, 2, command);
225062306a36Sopenharmony_ci	}
225162306a36Sopenharmony_ci
225262306a36Sopenharmony_ci	/* Pick addresses for the BARs. */
225362306a36Sopenharmony_ci	do {
225462306a36Sopenharmony_ci		list_for_each_entry(hpdev, &hbus->children, list_entry) {
225562306a36Sopenharmony_ci			for (i = 0; i < PCI_STD_NUM_BARS; i++) {
225662306a36Sopenharmony_ci				bar_val = hpdev->probed_bar[i];
225762306a36Sopenharmony_ci				if (bar_val == 0)
225862306a36Sopenharmony_ci					continue;
225962306a36Sopenharmony_ci				high = bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64;
226062306a36Sopenharmony_ci				if (high) {
226162306a36Sopenharmony_ci					bar_val |=
226262306a36Sopenharmony_ci						((u64)hpdev->probed_bar[i + 1]
226362306a36Sopenharmony_ci						 << 32);
226462306a36Sopenharmony_ci				} else {
226562306a36Sopenharmony_ci					bar_val |= 0xffffffffULL << 32;
226662306a36Sopenharmony_ci				}
226762306a36Sopenharmony_ci				bar_size = get_bar_size(bar_val);
226862306a36Sopenharmony_ci				if (high) {
226962306a36Sopenharmony_ci					if (high_size != bar_size) {
227062306a36Sopenharmony_ci						i++;
227162306a36Sopenharmony_ci						continue;
227262306a36Sopenharmony_ci					}
227362306a36Sopenharmony_ci					_hv_pcifront_write_config(hpdev,
227462306a36Sopenharmony_ci						PCI_BASE_ADDRESS_0 + (4 * i),
227562306a36Sopenharmony_ci						4,
227662306a36Sopenharmony_ci						(u32)(high_base & 0xffffff00));
227762306a36Sopenharmony_ci					i++;
227862306a36Sopenharmony_ci					_hv_pcifront_write_config(hpdev,
227962306a36Sopenharmony_ci						PCI_BASE_ADDRESS_0 + (4 * i),
228062306a36Sopenharmony_ci						4, (u32)(high_base >> 32));
228162306a36Sopenharmony_ci					high_base += bar_size;
228262306a36Sopenharmony_ci				} else {
228362306a36Sopenharmony_ci					if (low_size != bar_size)
228462306a36Sopenharmony_ci						continue;
228562306a36Sopenharmony_ci					_hv_pcifront_write_config(hpdev,
228662306a36Sopenharmony_ci						PCI_BASE_ADDRESS_0 + (4 * i),
228762306a36Sopenharmony_ci						4,
228862306a36Sopenharmony_ci						(u32)(low_base & 0xffffff00));
228962306a36Sopenharmony_ci					low_base += bar_size;
229062306a36Sopenharmony_ci				}
229162306a36Sopenharmony_ci			}
229262306a36Sopenharmony_ci			if (high_size <= 1 && low_size <= 1) {
229362306a36Sopenharmony_ci				/*
229462306a36Sopenharmony_ci				 * No need to set the PCI_COMMAND_MEMORY bit as
229562306a36Sopenharmony_ci				 * the core PCI driver doesn't require the bit
229662306a36Sopenharmony_ci				 * to be pre-set. Actually here we intentionally
229762306a36Sopenharmony_ci				 * keep the bit off so that the PCI BAR probing
229862306a36Sopenharmony_ci				 * in the core PCI driver doesn't cause Hyper-V
229962306a36Sopenharmony_ci				 * to unnecessarily unmap/map the virtual BARs
230062306a36Sopenharmony_ci				 * from/to the physical BARs multiple times.
230162306a36Sopenharmony_ci				 * This reduces the VM boot time significantly
230262306a36Sopenharmony_ci				 * if the BAR sizes are huge.
230362306a36Sopenharmony_ci				 */
230462306a36Sopenharmony_ci				break;
230562306a36Sopenharmony_ci			}
230662306a36Sopenharmony_ci		}
230762306a36Sopenharmony_ci
230862306a36Sopenharmony_ci		high_size >>= 1;
230962306a36Sopenharmony_ci		low_size >>= 1;
231062306a36Sopenharmony_ci	}  while (high_size || low_size);
231162306a36Sopenharmony_ci
231262306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
231362306a36Sopenharmony_ci}
231462306a36Sopenharmony_ci
231562306a36Sopenharmony_ci/*
231662306a36Sopenharmony_ci * Assign entries in sysfs pci slot directory.
231762306a36Sopenharmony_ci *
231862306a36Sopenharmony_ci * Note that this function does not need to lock the children list
231962306a36Sopenharmony_ci * because it is called from pci_devices_present_work which
232062306a36Sopenharmony_ci * is serialized with hv_eject_device_work because they are on the
232162306a36Sopenharmony_ci * same ordered workqueue. Therefore hbus->children list will not change
232262306a36Sopenharmony_ci * even when pci_create_slot sleeps.
232362306a36Sopenharmony_ci */
232462306a36Sopenharmony_cistatic void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
232562306a36Sopenharmony_ci{
232662306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
232762306a36Sopenharmony_ci	char name[SLOT_NAME_SIZE];
232862306a36Sopenharmony_ci	int slot_nr;
232962306a36Sopenharmony_ci
233062306a36Sopenharmony_ci	list_for_each_entry(hpdev, &hbus->children, list_entry) {
233162306a36Sopenharmony_ci		if (hpdev->pci_slot)
233262306a36Sopenharmony_ci			continue;
233362306a36Sopenharmony_ci
233462306a36Sopenharmony_ci		slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
233562306a36Sopenharmony_ci		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
233662306a36Sopenharmony_ci		hpdev->pci_slot = pci_create_slot(hbus->bridge->bus, slot_nr,
233762306a36Sopenharmony_ci					  name, NULL);
233862306a36Sopenharmony_ci		if (IS_ERR(hpdev->pci_slot)) {
233962306a36Sopenharmony_ci			pr_warn("pci_create slot %s failed\n", name);
234062306a36Sopenharmony_ci			hpdev->pci_slot = NULL;
234162306a36Sopenharmony_ci		}
234262306a36Sopenharmony_ci	}
234362306a36Sopenharmony_ci}
234462306a36Sopenharmony_ci
234562306a36Sopenharmony_ci/*
234662306a36Sopenharmony_ci * Remove entries in sysfs pci slot directory.
234762306a36Sopenharmony_ci */
234862306a36Sopenharmony_cistatic void hv_pci_remove_slots(struct hv_pcibus_device *hbus)
234962306a36Sopenharmony_ci{
235062306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
235162306a36Sopenharmony_ci
235262306a36Sopenharmony_ci	list_for_each_entry(hpdev, &hbus->children, list_entry) {
235362306a36Sopenharmony_ci		if (!hpdev->pci_slot)
235462306a36Sopenharmony_ci			continue;
235562306a36Sopenharmony_ci		pci_destroy_slot(hpdev->pci_slot);
235662306a36Sopenharmony_ci		hpdev->pci_slot = NULL;
235762306a36Sopenharmony_ci	}
235862306a36Sopenharmony_ci}
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci/*
236162306a36Sopenharmony_ci * Set NUMA node for the devices on the bus
236262306a36Sopenharmony_ci */
236362306a36Sopenharmony_cistatic void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
236462306a36Sopenharmony_ci{
236562306a36Sopenharmony_ci	struct pci_dev *dev;
236662306a36Sopenharmony_ci	struct pci_bus *bus = hbus->bridge->bus;
236762306a36Sopenharmony_ci	struct hv_pci_dev *hv_dev;
236862306a36Sopenharmony_ci
236962306a36Sopenharmony_ci	list_for_each_entry(dev, &bus->devices, bus_list) {
237062306a36Sopenharmony_ci		hv_dev = get_pcichild_wslot(hbus, devfn_to_wslot(dev->devfn));
237162306a36Sopenharmony_ci		if (!hv_dev)
237262306a36Sopenharmony_ci			continue;
237362306a36Sopenharmony_ci
237462306a36Sopenharmony_ci		if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY &&
237562306a36Sopenharmony_ci		    hv_dev->desc.virtual_numa_node < num_possible_nodes())
237662306a36Sopenharmony_ci			/*
237762306a36Sopenharmony_ci			 * The kernel may boot with some NUMA nodes offline
237862306a36Sopenharmony_ci			 * (e.g. in a KDUMP kernel) or with NUMA disabled via
237962306a36Sopenharmony_ci			 * "numa=off". In those cases, adjust the host provided
238062306a36Sopenharmony_ci			 * NUMA node to a valid NUMA node used by the kernel.
238162306a36Sopenharmony_ci			 */
238262306a36Sopenharmony_ci			set_dev_node(&dev->dev,
238362306a36Sopenharmony_ci				     numa_map_to_online_node(
238462306a36Sopenharmony_ci					     hv_dev->desc.virtual_numa_node));
238562306a36Sopenharmony_ci
238662306a36Sopenharmony_ci		put_pcichild(hv_dev);
238762306a36Sopenharmony_ci	}
238862306a36Sopenharmony_ci}
238962306a36Sopenharmony_ci
239062306a36Sopenharmony_ci/**
239162306a36Sopenharmony_ci * create_root_hv_pci_bus() - Expose a new root PCI bus
239262306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
239362306a36Sopenharmony_ci *
239462306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
239562306a36Sopenharmony_ci */
239662306a36Sopenharmony_cistatic int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
239762306a36Sopenharmony_ci{
239862306a36Sopenharmony_ci	int error;
239962306a36Sopenharmony_ci	struct pci_host_bridge *bridge = hbus->bridge;
240062306a36Sopenharmony_ci
240162306a36Sopenharmony_ci	bridge->dev.parent = &hbus->hdev->device;
240262306a36Sopenharmony_ci	bridge->sysdata = &hbus->sysdata;
240362306a36Sopenharmony_ci	bridge->ops = &hv_pcifront_ops;
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci	error = pci_scan_root_bus_bridge(bridge);
240662306a36Sopenharmony_ci	if (error)
240762306a36Sopenharmony_ci		return error;
240862306a36Sopenharmony_ci
240962306a36Sopenharmony_ci	pci_lock_rescan_remove();
241062306a36Sopenharmony_ci	hv_pci_assign_numa_node(hbus);
241162306a36Sopenharmony_ci	pci_bus_assign_resources(bridge->bus);
241262306a36Sopenharmony_ci	hv_pci_assign_slots(hbus);
241362306a36Sopenharmony_ci	pci_bus_add_devices(bridge->bus);
241462306a36Sopenharmony_ci	pci_unlock_rescan_remove();
241562306a36Sopenharmony_ci	hbus->state = hv_pcibus_installed;
241662306a36Sopenharmony_ci	return 0;
241762306a36Sopenharmony_ci}
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_cistruct q_res_req_compl {
242062306a36Sopenharmony_ci	struct completion host_event;
242162306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
242262306a36Sopenharmony_ci};
242362306a36Sopenharmony_ci
242462306a36Sopenharmony_ci/**
242562306a36Sopenharmony_ci * q_resource_requirements() - Query Resource Requirements
242662306a36Sopenharmony_ci * @context:		The completion context.
242762306a36Sopenharmony_ci * @resp:		The response that came from the host.
242862306a36Sopenharmony_ci * @resp_packet_size:	The size in bytes of resp.
242962306a36Sopenharmony_ci *
243062306a36Sopenharmony_ci * This function is invoked on completion of a Query Resource
243162306a36Sopenharmony_ci * Requirements packet.
243262306a36Sopenharmony_ci */
243362306a36Sopenharmony_cistatic void q_resource_requirements(void *context, struct pci_response *resp,
243462306a36Sopenharmony_ci				    int resp_packet_size)
243562306a36Sopenharmony_ci{
243662306a36Sopenharmony_ci	struct q_res_req_compl *completion = context;
243762306a36Sopenharmony_ci	struct pci_q_res_req_response *q_res_req =
243862306a36Sopenharmony_ci		(struct pci_q_res_req_response *)resp;
243962306a36Sopenharmony_ci	s32 status;
244062306a36Sopenharmony_ci	int i;
244162306a36Sopenharmony_ci
244262306a36Sopenharmony_ci	status = (resp_packet_size < sizeof(*q_res_req)) ? -1 : resp->status;
244362306a36Sopenharmony_ci	if (status < 0) {
244462306a36Sopenharmony_ci		dev_err(&completion->hpdev->hbus->hdev->device,
244562306a36Sopenharmony_ci			"query resource requirements failed: %x\n",
244662306a36Sopenharmony_ci			status);
244762306a36Sopenharmony_ci	} else {
244862306a36Sopenharmony_ci		for (i = 0; i < PCI_STD_NUM_BARS; i++) {
244962306a36Sopenharmony_ci			completion->hpdev->probed_bar[i] =
245062306a36Sopenharmony_ci				q_res_req->probed_bar[i];
245162306a36Sopenharmony_ci		}
245262306a36Sopenharmony_ci	}
245362306a36Sopenharmony_ci
245462306a36Sopenharmony_ci	complete(&completion->host_event);
245562306a36Sopenharmony_ci}
245662306a36Sopenharmony_ci
245762306a36Sopenharmony_ci/**
245862306a36Sopenharmony_ci * new_pcichild_device() - Create a new child device
245962306a36Sopenharmony_ci * @hbus:	The internal struct tracking this root PCI bus.
246062306a36Sopenharmony_ci * @desc:	The information supplied so far from the host
246162306a36Sopenharmony_ci *              about the device.
246262306a36Sopenharmony_ci *
246362306a36Sopenharmony_ci * This function creates the tracking structure for a new child
246462306a36Sopenharmony_ci * device and kicks off the process of figuring out what it is.
246562306a36Sopenharmony_ci *
246662306a36Sopenharmony_ci * Return: Pointer to the new tracking struct
246762306a36Sopenharmony_ci */
246862306a36Sopenharmony_cistatic struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
246962306a36Sopenharmony_ci		struct hv_pcidev_description *desc)
247062306a36Sopenharmony_ci{
247162306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
247262306a36Sopenharmony_ci	struct pci_child_message *res_req;
247362306a36Sopenharmony_ci	struct q_res_req_compl comp_pkt;
247462306a36Sopenharmony_ci	struct {
247562306a36Sopenharmony_ci		struct pci_packet init_packet;
247662306a36Sopenharmony_ci		u8 buffer[sizeof(struct pci_child_message)];
247762306a36Sopenharmony_ci	} pkt;
247862306a36Sopenharmony_ci	unsigned long flags;
247962306a36Sopenharmony_ci	int ret;
248062306a36Sopenharmony_ci
248162306a36Sopenharmony_ci	hpdev = kzalloc(sizeof(*hpdev), GFP_KERNEL);
248262306a36Sopenharmony_ci	if (!hpdev)
248362306a36Sopenharmony_ci		return NULL;
248462306a36Sopenharmony_ci
248562306a36Sopenharmony_ci	hpdev->hbus = hbus;
248662306a36Sopenharmony_ci
248762306a36Sopenharmony_ci	memset(&pkt, 0, sizeof(pkt));
248862306a36Sopenharmony_ci	init_completion(&comp_pkt.host_event);
248962306a36Sopenharmony_ci	comp_pkt.hpdev = hpdev;
249062306a36Sopenharmony_ci	pkt.init_packet.compl_ctxt = &comp_pkt;
249162306a36Sopenharmony_ci	pkt.init_packet.completion_func = q_resource_requirements;
249262306a36Sopenharmony_ci	res_req = (struct pci_child_message *)&pkt.init_packet.message;
249362306a36Sopenharmony_ci	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
249462306a36Sopenharmony_ci	res_req->wslot.slot = desc->win_slot.slot;
249562306a36Sopenharmony_ci
249662306a36Sopenharmony_ci	ret = vmbus_sendpacket(hbus->hdev->channel, res_req,
249762306a36Sopenharmony_ci			       sizeof(struct pci_child_message),
249862306a36Sopenharmony_ci			       (unsigned long)&pkt.init_packet,
249962306a36Sopenharmony_ci			       VM_PKT_DATA_INBAND,
250062306a36Sopenharmony_ci			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
250162306a36Sopenharmony_ci	if (ret)
250262306a36Sopenharmony_ci		goto error;
250362306a36Sopenharmony_ci
250462306a36Sopenharmony_ci	if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
250562306a36Sopenharmony_ci		goto error;
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci	hpdev->desc = *desc;
250862306a36Sopenharmony_ci	refcount_set(&hpdev->refs, 1);
250962306a36Sopenharmony_ci	get_pcichild(hpdev);
251062306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
251162306a36Sopenharmony_ci
251262306a36Sopenharmony_ci	list_add_tail(&hpdev->list_entry, &hbus->children);
251362306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
251462306a36Sopenharmony_ci	return hpdev;
251562306a36Sopenharmony_ci
251662306a36Sopenharmony_cierror:
251762306a36Sopenharmony_ci	kfree(hpdev);
251862306a36Sopenharmony_ci	return NULL;
251962306a36Sopenharmony_ci}
252062306a36Sopenharmony_ci
252162306a36Sopenharmony_ci/**
252262306a36Sopenharmony_ci * get_pcichild_wslot() - Find device from slot
252362306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
252462306a36Sopenharmony_ci * @wslot:	Location on the bus
252562306a36Sopenharmony_ci *
252662306a36Sopenharmony_ci * This function looks up a PCI device and returns the internal
252762306a36Sopenharmony_ci * representation of it.  It acquires a reference on it, so that
252862306a36Sopenharmony_ci * the device won't be deleted while somebody is using it.  The
252962306a36Sopenharmony_ci * caller is responsible for calling put_pcichild() to release
253062306a36Sopenharmony_ci * this reference.
253162306a36Sopenharmony_ci *
253262306a36Sopenharmony_ci * Return:	Internal representation of a PCI device
253362306a36Sopenharmony_ci */
253462306a36Sopenharmony_cistatic struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
253562306a36Sopenharmony_ci					     u32 wslot)
253662306a36Sopenharmony_ci{
253762306a36Sopenharmony_ci	unsigned long flags;
253862306a36Sopenharmony_ci	struct hv_pci_dev *iter, *hpdev = NULL;
253962306a36Sopenharmony_ci
254062306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
254162306a36Sopenharmony_ci	list_for_each_entry(iter, &hbus->children, list_entry) {
254262306a36Sopenharmony_ci		if (iter->desc.win_slot.slot == wslot) {
254362306a36Sopenharmony_ci			hpdev = iter;
254462306a36Sopenharmony_ci			get_pcichild(hpdev);
254562306a36Sopenharmony_ci			break;
254662306a36Sopenharmony_ci		}
254762306a36Sopenharmony_ci	}
254862306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
254962306a36Sopenharmony_ci
255062306a36Sopenharmony_ci	return hpdev;
255162306a36Sopenharmony_ci}
255262306a36Sopenharmony_ci
255362306a36Sopenharmony_ci/**
255462306a36Sopenharmony_ci * pci_devices_present_work() - Handle new list of child devices
255562306a36Sopenharmony_ci * @work:	Work struct embedded in struct hv_dr_work
255662306a36Sopenharmony_ci *
255762306a36Sopenharmony_ci * "Bus Relations" is the Windows term for "children of this
255862306a36Sopenharmony_ci * bus."  The terminology is preserved here for people trying to
255962306a36Sopenharmony_ci * debug the interaction between Hyper-V and Linux.  This
256062306a36Sopenharmony_ci * function is called when the parent partition reports a list
256162306a36Sopenharmony_ci * of functions that should be observed under this PCI Express
256262306a36Sopenharmony_ci * port (bus).
256362306a36Sopenharmony_ci *
256462306a36Sopenharmony_ci * This function updates the list, and must tolerate being
256562306a36Sopenharmony_ci * called multiple times with the same information.  The typical
256662306a36Sopenharmony_ci * number of child devices is one, with very atypical cases
256762306a36Sopenharmony_ci * involving three or four, so the algorithms used here can be
256862306a36Sopenharmony_ci * simple and inefficient.
256962306a36Sopenharmony_ci *
257062306a36Sopenharmony_ci * It must also treat the omission of a previously observed device as
257162306a36Sopenharmony_ci * notification that the device no longer exists.
257262306a36Sopenharmony_ci *
257362306a36Sopenharmony_ci * Note that this function is serialized with hv_eject_device_work(),
257462306a36Sopenharmony_ci * because both are pushed to the ordered workqueue hbus->wq.
257562306a36Sopenharmony_ci */
257662306a36Sopenharmony_cistatic void pci_devices_present_work(struct work_struct *work)
257762306a36Sopenharmony_ci{
257862306a36Sopenharmony_ci	u32 child_no;
257962306a36Sopenharmony_ci	bool found;
258062306a36Sopenharmony_ci	struct hv_pcidev_description *new_desc;
258162306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
258262306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
258362306a36Sopenharmony_ci	struct list_head removed;
258462306a36Sopenharmony_ci	struct hv_dr_work *dr_wrk;
258562306a36Sopenharmony_ci	struct hv_dr_state *dr = NULL;
258662306a36Sopenharmony_ci	unsigned long flags;
258762306a36Sopenharmony_ci
258862306a36Sopenharmony_ci	dr_wrk = container_of(work, struct hv_dr_work, wrk);
258962306a36Sopenharmony_ci	hbus = dr_wrk->bus;
259062306a36Sopenharmony_ci	kfree(dr_wrk);
259162306a36Sopenharmony_ci
259262306a36Sopenharmony_ci	INIT_LIST_HEAD(&removed);
259362306a36Sopenharmony_ci
259462306a36Sopenharmony_ci	/* Pull this off the queue and process it if it was the last one. */
259562306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
259662306a36Sopenharmony_ci	while (!list_empty(&hbus->dr_list)) {
259762306a36Sopenharmony_ci		dr = list_first_entry(&hbus->dr_list, struct hv_dr_state,
259862306a36Sopenharmony_ci				      list_entry);
259962306a36Sopenharmony_ci		list_del(&dr->list_entry);
260062306a36Sopenharmony_ci
260162306a36Sopenharmony_ci		/* Throw this away if the list still has stuff in it. */
260262306a36Sopenharmony_ci		if (!list_empty(&hbus->dr_list)) {
260362306a36Sopenharmony_ci			kfree(dr);
260462306a36Sopenharmony_ci			continue;
260562306a36Sopenharmony_ci		}
260662306a36Sopenharmony_ci	}
260762306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
260862306a36Sopenharmony_ci
260962306a36Sopenharmony_ci	if (!dr)
261062306a36Sopenharmony_ci		return;
261162306a36Sopenharmony_ci
261262306a36Sopenharmony_ci	mutex_lock(&hbus->state_lock);
261362306a36Sopenharmony_ci
261462306a36Sopenharmony_ci	/* First, mark all existing children as reported missing. */
261562306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
261662306a36Sopenharmony_ci	list_for_each_entry(hpdev, &hbus->children, list_entry) {
261762306a36Sopenharmony_ci		hpdev->reported_missing = true;
261862306a36Sopenharmony_ci	}
261962306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
262062306a36Sopenharmony_ci
262162306a36Sopenharmony_ci	/* Next, add back any reported devices. */
262262306a36Sopenharmony_ci	for (child_no = 0; child_no < dr->device_count; child_no++) {
262362306a36Sopenharmony_ci		found = false;
262462306a36Sopenharmony_ci		new_desc = &dr->func[child_no];
262562306a36Sopenharmony_ci
262662306a36Sopenharmony_ci		spin_lock_irqsave(&hbus->device_list_lock, flags);
262762306a36Sopenharmony_ci		list_for_each_entry(hpdev, &hbus->children, list_entry) {
262862306a36Sopenharmony_ci			if ((hpdev->desc.win_slot.slot == new_desc->win_slot.slot) &&
262962306a36Sopenharmony_ci			    (hpdev->desc.v_id == new_desc->v_id) &&
263062306a36Sopenharmony_ci			    (hpdev->desc.d_id == new_desc->d_id) &&
263162306a36Sopenharmony_ci			    (hpdev->desc.ser == new_desc->ser)) {
263262306a36Sopenharmony_ci				hpdev->reported_missing = false;
263362306a36Sopenharmony_ci				found = true;
263462306a36Sopenharmony_ci			}
263562306a36Sopenharmony_ci		}
263662306a36Sopenharmony_ci		spin_unlock_irqrestore(&hbus->device_list_lock, flags);
263762306a36Sopenharmony_ci
263862306a36Sopenharmony_ci		if (!found) {
263962306a36Sopenharmony_ci			hpdev = new_pcichild_device(hbus, new_desc);
264062306a36Sopenharmony_ci			if (!hpdev)
264162306a36Sopenharmony_ci				dev_err(&hbus->hdev->device,
264262306a36Sopenharmony_ci					"couldn't record a child device.\n");
264362306a36Sopenharmony_ci		}
264462306a36Sopenharmony_ci	}
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci	/* Move missing children to a list on the stack. */
264762306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
264862306a36Sopenharmony_ci	do {
264962306a36Sopenharmony_ci		found = false;
265062306a36Sopenharmony_ci		list_for_each_entry(hpdev, &hbus->children, list_entry) {
265162306a36Sopenharmony_ci			if (hpdev->reported_missing) {
265262306a36Sopenharmony_ci				found = true;
265362306a36Sopenharmony_ci				put_pcichild(hpdev);
265462306a36Sopenharmony_ci				list_move_tail(&hpdev->list_entry, &removed);
265562306a36Sopenharmony_ci				break;
265662306a36Sopenharmony_ci			}
265762306a36Sopenharmony_ci		}
265862306a36Sopenharmony_ci	} while (found);
265962306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci	/* Delete everything that should no longer exist. */
266262306a36Sopenharmony_ci	while (!list_empty(&removed)) {
266362306a36Sopenharmony_ci		hpdev = list_first_entry(&removed, struct hv_pci_dev,
266462306a36Sopenharmony_ci					 list_entry);
266562306a36Sopenharmony_ci		list_del(&hpdev->list_entry);
266662306a36Sopenharmony_ci
266762306a36Sopenharmony_ci		if (hpdev->pci_slot)
266862306a36Sopenharmony_ci			pci_destroy_slot(hpdev->pci_slot);
266962306a36Sopenharmony_ci
267062306a36Sopenharmony_ci		put_pcichild(hpdev);
267162306a36Sopenharmony_ci	}
267262306a36Sopenharmony_ci
267362306a36Sopenharmony_ci	switch (hbus->state) {
267462306a36Sopenharmony_ci	case hv_pcibus_installed:
267562306a36Sopenharmony_ci		/*
267662306a36Sopenharmony_ci		 * Tell the core to rescan bus
267762306a36Sopenharmony_ci		 * because there may have been changes.
267862306a36Sopenharmony_ci		 */
267962306a36Sopenharmony_ci		pci_lock_rescan_remove();
268062306a36Sopenharmony_ci		pci_scan_child_bus(hbus->bridge->bus);
268162306a36Sopenharmony_ci		hv_pci_assign_numa_node(hbus);
268262306a36Sopenharmony_ci		hv_pci_assign_slots(hbus);
268362306a36Sopenharmony_ci		pci_unlock_rescan_remove();
268462306a36Sopenharmony_ci		break;
268562306a36Sopenharmony_ci
268662306a36Sopenharmony_ci	case hv_pcibus_init:
268762306a36Sopenharmony_ci	case hv_pcibus_probed:
268862306a36Sopenharmony_ci		survey_child_resources(hbus);
268962306a36Sopenharmony_ci		break;
269062306a36Sopenharmony_ci
269162306a36Sopenharmony_ci	default:
269262306a36Sopenharmony_ci		break;
269362306a36Sopenharmony_ci	}
269462306a36Sopenharmony_ci
269562306a36Sopenharmony_ci	mutex_unlock(&hbus->state_lock);
269662306a36Sopenharmony_ci
269762306a36Sopenharmony_ci	kfree(dr);
269862306a36Sopenharmony_ci}
269962306a36Sopenharmony_ci
270062306a36Sopenharmony_ci/**
270162306a36Sopenharmony_ci * hv_pci_start_relations_work() - Queue work to start device discovery
270262306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
270362306a36Sopenharmony_ci * @dr:		The list of children returned from host
270462306a36Sopenharmony_ci *
270562306a36Sopenharmony_ci * Return:  0 on success, -errno on failure
270662306a36Sopenharmony_ci */
270762306a36Sopenharmony_cistatic int hv_pci_start_relations_work(struct hv_pcibus_device *hbus,
270862306a36Sopenharmony_ci				       struct hv_dr_state *dr)
270962306a36Sopenharmony_ci{
271062306a36Sopenharmony_ci	struct hv_dr_work *dr_wrk;
271162306a36Sopenharmony_ci	unsigned long flags;
271262306a36Sopenharmony_ci	bool pending_dr;
271362306a36Sopenharmony_ci
271462306a36Sopenharmony_ci	if (hbus->state == hv_pcibus_removing) {
271562306a36Sopenharmony_ci		dev_info(&hbus->hdev->device,
271662306a36Sopenharmony_ci			 "PCI VMBus BUS_RELATIONS: ignored\n");
271762306a36Sopenharmony_ci		return -ENOENT;
271862306a36Sopenharmony_ci	}
271962306a36Sopenharmony_ci
272062306a36Sopenharmony_ci	dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
272162306a36Sopenharmony_ci	if (!dr_wrk)
272262306a36Sopenharmony_ci		return -ENOMEM;
272362306a36Sopenharmony_ci
272462306a36Sopenharmony_ci	INIT_WORK(&dr_wrk->wrk, pci_devices_present_work);
272562306a36Sopenharmony_ci	dr_wrk->bus = hbus;
272662306a36Sopenharmony_ci
272762306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
272862306a36Sopenharmony_ci	/*
272962306a36Sopenharmony_ci	 * If pending_dr is true, we have already queued a work,
273062306a36Sopenharmony_ci	 * which will see the new dr. Otherwise, we need to
273162306a36Sopenharmony_ci	 * queue a new work.
273262306a36Sopenharmony_ci	 */
273362306a36Sopenharmony_ci	pending_dr = !list_empty(&hbus->dr_list);
273462306a36Sopenharmony_ci	list_add_tail(&dr->list_entry, &hbus->dr_list);
273562306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
273662306a36Sopenharmony_ci
273762306a36Sopenharmony_ci	if (pending_dr)
273862306a36Sopenharmony_ci		kfree(dr_wrk);
273962306a36Sopenharmony_ci	else
274062306a36Sopenharmony_ci		queue_work(hbus->wq, &dr_wrk->wrk);
274162306a36Sopenharmony_ci
274262306a36Sopenharmony_ci	return 0;
274362306a36Sopenharmony_ci}
274462306a36Sopenharmony_ci
274562306a36Sopenharmony_ci/**
274662306a36Sopenharmony_ci * hv_pci_devices_present() - Handle list of new children
274762306a36Sopenharmony_ci * @hbus:      Root PCI bus, as understood by this driver
274862306a36Sopenharmony_ci * @relations: Packet from host listing children
274962306a36Sopenharmony_ci *
275062306a36Sopenharmony_ci * Process a new list of devices on the bus. The list of devices is
275162306a36Sopenharmony_ci * discovered by VSP and sent to us via VSP message PCI_BUS_RELATIONS,
275262306a36Sopenharmony_ci * whenever a new list of devices for this bus appears.
275362306a36Sopenharmony_ci */
275462306a36Sopenharmony_cistatic void hv_pci_devices_present(struct hv_pcibus_device *hbus,
275562306a36Sopenharmony_ci				   struct pci_bus_relations *relations)
275662306a36Sopenharmony_ci{
275762306a36Sopenharmony_ci	struct hv_dr_state *dr;
275862306a36Sopenharmony_ci	int i;
275962306a36Sopenharmony_ci
276062306a36Sopenharmony_ci	dr = kzalloc(struct_size(dr, func, relations->device_count),
276162306a36Sopenharmony_ci		     GFP_NOWAIT);
276262306a36Sopenharmony_ci	if (!dr)
276362306a36Sopenharmony_ci		return;
276462306a36Sopenharmony_ci
276562306a36Sopenharmony_ci	dr->device_count = relations->device_count;
276662306a36Sopenharmony_ci	for (i = 0; i < dr->device_count; i++) {
276762306a36Sopenharmony_ci		dr->func[i].v_id = relations->func[i].v_id;
276862306a36Sopenharmony_ci		dr->func[i].d_id = relations->func[i].d_id;
276962306a36Sopenharmony_ci		dr->func[i].rev = relations->func[i].rev;
277062306a36Sopenharmony_ci		dr->func[i].prog_intf = relations->func[i].prog_intf;
277162306a36Sopenharmony_ci		dr->func[i].subclass = relations->func[i].subclass;
277262306a36Sopenharmony_ci		dr->func[i].base_class = relations->func[i].base_class;
277362306a36Sopenharmony_ci		dr->func[i].subsystem_id = relations->func[i].subsystem_id;
277462306a36Sopenharmony_ci		dr->func[i].win_slot = relations->func[i].win_slot;
277562306a36Sopenharmony_ci		dr->func[i].ser = relations->func[i].ser;
277662306a36Sopenharmony_ci	}
277762306a36Sopenharmony_ci
277862306a36Sopenharmony_ci	if (hv_pci_start_relations_work(hbus, dr))
277962306a36Sopenharmony_ci		kfree(dr);
278062306a36Sopenharmony_ci}
278162306a36Sopenharmony_ci
278262306a36Sopenharmony_ci/**
278362306a36Sopenharmony_ci * hv_pci_devices_present2() - Handle list of new children
278462306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
278562306a36Sopenharmony_ci * @relations:	Packet from host listing children
278662306a36Sopenharmony_ci *
278762306a36Sopenharmony_ci * This function is the v2 version of hv_pci_devices_present()
278862306a36Sopenharmony_ci */
278962306a36Sopenharmony_cistatic void hv_pci_devices_present2(struct hv_pcibus_device *hbus,
279062306a36Sopenharmony_ci				    struct pci_bus_relations2 *relations)
279162306a36Sopenharmony_ci{
279262306a36Sopenharmony_ci	struct hv_dr_state *dr;
279362306a36Sopenharmony_ci	int i;
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_ci	dr = kzalloc(struct_size(dr, func, relations->device_count),
279662306a36Sopenharmony_ci		     GFP_NOWAIT);
279762306a36Sopenharmony_ci	if (!dr)
279862306a36Sopenharmony_ci		return;
279962306a36Sopenharmony_ci
280062306a36Sopenharmony_ci	dr->device_count = relations->device_count;
280162306a36Sopenharmony_ci	for (i = 0; i < dr->device_count; i++) {
280262306a36Sopenharmony_ci		dr->func[i].v_id = relations->func[i].v_id;
280362306a36Sopenharmony_ci		dr->func[i].d_id = relations->func[i].d_id;
280462306a36Sopenharmony_ci		dr->func[i].rev = relations->func[i].rev;
280562306a36Sopenharmony_ci		dr->func[i].prog_intf = relations->func[i].prog_intf;
280662306a36Sopenharmony_ci		dr->func[i].subclass = relations->func[i].subclass;
280762306a36Sopenharmony_ci		dr->func[i].base_class = relations->func[i].base_class;
280862306a36Sopenharmony_ci		dr->func[i].subsystem_id = relations->func[i].subsystem_id;
280962306a36Sopenharmony_ci		dr->func[i].win_slot = relations->func[i].win_slot;
281062306a36Sopenharmony_ci		dr->func[i].ser = relations->func[i].ser;
281162306a36Sopenharmony_ci		dr->func[i].flags = relations->func[i].flags;
281262306a36Sopenharmony_ci		dr->func[i].virtual_numa_node =
281362306a36Sopenharmony_ci			relations->func[i].virtual_numa_node;
281462306a36Sopenharmony_ci	}
281562306a36Sopenharmony_ci
281662306a36Sopenharmony_ci	if (hv_pci_start_relations_work(hbus, dr))
281762306a36Sopenharmony_ci		kfree(dr);
281862306a36Sopenharmony_ci}
281962306a36Sopenharmony_ci
282062306a36Sopenharmony_ci/**
282162306a36Sopenharmony_ci * hv_eject_device_work() - Asynchronously handles ejection
282262306a36Sopenharmony_ci * @work:	Work struct embedded in internal device struct
282362306a36Sopenharmony_ci *
282462306a36Sopenharmony_ci * This function handles ejecting a device.  Windows will
282562306a36Sopenharmony_ci * attempt to gracefully eject a device, waiting 60 seconds to
282662306a36Sopenharmony_ci * hear back from the guest OS that this completed successfully.
282762306a36Sopenharmony_ci * If this timer expires, the device will be forcibly removed.
282862306a36Sopenharmony_ci */
282962306a36Sopenharmony_cistatic void hv_eject_device_work(struct work_struct *work)
283062306a36Sopenharmony_ci{
283162306a36Sopenharmony_ci	struct pci_eject_response *ejct_pkt;
283262306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
283362306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
283462306a36Sopenharmony_ci	struct pci_dev *pdev;
283562306a36Sopenharmony_ci	unsigned long flags;
283662306a36Sopenharmony_ci	int wslot;
283762306a36Sopenharmony_ci	struct {
283862306a36Sopenharmony_ci		struct pci_packet pkt;
283962306a36Sopenharmony_ci		u8 buffer[sizeof(struct pci_eject_response)];
284062306a36Sopenharmony_ci	} ctxt;
284162306a36Sopenharmony_ci
284262306a36Sopenharmony_ci	hpdev = container_of(work, struct hv_pci_dev, wrk);
284362306a36Sopenharmony_ci	hbus = hpdev->hbus;
284462306a36Sopenharmony_ci
284562306a36Sopenharmony_ci	mutex_lock(&hbus->state_lock);
284662306a36Sopenharmony_ci
284762306a36Sopenharmony_ci	/*
284862306a36Sopenharmony_ci	 * Ejection can come before or after the PCI bus has been set up, so
284962306a36Sopenharmony_ci	 * attempt to find it and tear down the bus state, if it exists.  This
285062306a36Sopenharmony_ci	 * must be done without constructs like pci_domain_nr(hbus->bridge->bus)
285162306a36Sopenharmony_ci	 * because hbus->bridge->bus may not exist yet.
285262306a36Sopenharmony_ci	 */
285362306a36Sopenharmony_ci	wslot = wslot_to_devfn(hpdev->desc.win_slot.slot);
285462306a36Sopenharmony_ci	pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot);
285562306a36Sopenharmony_ci	if (pdev) {
285662306a36Sopenharmony_ci		pci_lock_rescan_remove();
285762306a36Sopenharmony_ci		pci_stop_and_remove_bus_device(pdev);
285862306a36Sopenharmony_ci		pci_dev_put(pdev);
285962306a36Sopenharmony_ci		pci_unlock_rescan_remove();
286062306a36Sopenharmony_ci	}
286162306a36Sopenharmony_ci
286262306a36Sopenharmony_ci	spin_lock_irqsave(&hbus->device_list_lock, flags);
286362306a36Sopenharmony_ci	list_del(&hpdev->list_entry);
286462306a36Sopenharmony_ci	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
286562306a36Sopenharmony_ci
286662306a36Sopenharmony_ci	if (hpdev->pci_slot)
286762306a36Sopenharmony_ci		pci_destroy_slot(hpdev->pci_slot);
286862306a36Sopenharmony_ci
286962306a36Sopenharmony_ci	memset(&ctxt, 0, sizeof(ctxt));
287062306a36Sopenharmony_ci	ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
287162306a36Sopenharmony_ci	ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
287262306a36Sopenharmony_ci	ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot;
287362306a36Sopenharmony_ci	vmbus_sendpacket(hbus->hdev->channel, ejct_pkt,
287462306a36Sopenharmony_ci			 sizeof(*ejct_pkt), 0,
287562306a36Sopenharmony_ci			 VM_PKT_DATA_INBAND, 0);
287662306a36Sopenharmony_ci
287762306a36Sopenharmony_ci	/* For the get_pcichild() in hv_pci_eject_device() */
287862306a36Sopenharmony_ci	put_pcichild(hpdev);
287962306a36Sopenharmony_ci	/* For the two refs got in new_pcichild_device() */
288062306a36Sopenharmony_ci	put_pcichild(hpdev);
288162306a36Sopenharmony_ci	put_pcichild(hpdev);
288262306a36Sopenharmony_ci	/* hpdev has been freed. Do not use it any more. */
288362306a36Sopenharmony_ci
288462306a36Sopenharmony_ci	mutex_unlock(&hbus->state_lock);
288562306a36Sopenharmony_ci}
288662306a36Sopenharmony_ci
288762306a36Sopenharmony_ci/**
288862306a36Sopenharmony_ci * hv_pci_eject_device() - Handles device ejection
288962306a36Sopenharmony_ci * @hpdev:	Internal device tracking struct
289062306a36Sopenharmony_ci *
289162306a36Sopenharmony_ci * This function is invoked when an ejection packet arrives.  It
289262306a36Sopenharmony_ci * just schedules work so that we don't re-enter the packet
289362306a36Sopenharmony_ci * delivery code handling the ejection.
289462306a36Sopenharmony_ci */
289562306a36Sopenharmony_cistatic void hv_pci_eject_device(struct hv_pci_dev *hpdev)
289662306a36Sopenharmony_ci{
289762306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hpdev->hbus;
289862306a36Sopenharmony_ci	struct hv_device *hdev = hbus->hdev;
289962306a36Sopenharmony_ci
290062306a36Sopenharmony_ci	if (hbus->state == hv_pcibus_removing) {
290162306a36Sopenharmony_ci		dev_info(&hdev->device, "PCI VMBus EJECT: ignored\n");
290262306a36Sopenharmony_ci		return;
290362306a36Sopenharmony_ci	}
290462306a36Sopenharmony_ci
290562306a36Sopenharmony_ci	get_pcichild(hpdev);
290662306a36Sopenharmony_ci	INIT_WORK(&hpdev->wrk, hv_eject_device_work);
290762306a36Sopenharmony_ci	queue_work(hbus->wq, &hpdev->wrk);
290862306a36Sopenharmony_ci}
290962306a36Sopenharmony_ci
291062306a36Sopenharmony_ci/**
291162306a36Sopenharmony_ci * hv_pci_onchannelcallback() - Handles incoming packets
291262306a36Sopenharmony_ci * @context:	Internal bus tracking struct
291362306a36Sopenharmony_ci *
291462306a36Sopenharmony_ci * This function is invoked whenever the host sends a packet to
291562306a36Sopenharmony_ci * this channel (which is private to this root PCI bus).
291662306a36Sopenharmony_ci */
291762306a36Sopenharmony_cistatic void hv_pci_onchannelcallback(void *context)
291862306a36Sopenharmony_ci{
291962306a36Sopenharmony_ci	const int packet_size = 0x100;
292062306a36Sopenharmony_ci	int ret;
292162306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = context;
292262306a36Sopenharmony_ci	struct vmbus_channel *chan = hbus->hdev->channel;
292362306a36Sopenharmony_ci	u32 bytes_recvd;
292462306a36Sopenharmony_ci	u64 req_id, req_addr;
292562306a36Sopenharmony_ci	struct vmpacket_descriptor *desc;
292662306a36Sopenharmony_ci	unsigned char *buffer;
292762306a36Sopenharmony_ci	int bufferlen = packet_size;
292862306a36Sopenharmony_ci	struct pci_packet *comp_packet;
292962306a36Sopenharmony_ci	struct pci_response *response;
293062306a36Sopenharmony_ci	struct pci_incoming_message *new_message;
293162306a36Sopenharmony_ci	struct pci_bus_relations *bus_rel;
293262306a36Sopenharmony_ci	struct pci_bus_relations2 *bus_rel2;
293362306a36Sopenharmony_ci	struct pci_dev_inval_block *inval;
293462306a36Sopenharmony_ci	struct pci_dev_incoming *dev_message;
293562306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
293662306a36Sopenharmony_ci	unsigned long flags;
293762306a36Sopenharmony_ci
293862306a36Sopenharmony_ci	buffer = kmalloc(bufferlen, GFP_ATOMIC);
293962306a36Sopenharmony_ci	if (!buffer)
294062306a36Sopenharmony_ci		return;
294162306a36Sopenharmony_ci
294262306a36Sopenharmony_ci	while (1) {
294362306a36Sopenharmony_ci		ret = vmbus_recvpacket_raw(chan, buffer, bufferlen,
294462306a36Sopenharmony_ci					   &bytes_recvd, &req_id);
294562306a36Sopenharmony_ci
294662306a36Sopenharmony_ci		if (ret == -ENOBUFS) {
294762306a36Sopenharmony_ci			kfree(buffer);
294862306a36Sopenharmony_ci			/* Handle large packet */
294962306a36Sopenharmony_ci			bufferlen = bytes_recvd;
295062306a36Sopenharmony_ci			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
295162306a36Sopenharmony_ci			if (!buffer)
295262306a36Sopenharmony_ci				return;
295362306a36Sopenharmony_ci			continue;
295462306a36Sopenharmony_ci		}
295562306a36Sopenharmony_ci
295662306a36Sopenharmony_ci		/* Zero length indicates there are no more packets. */
295762306a36Sopenharmony_ci		if (ret || !bytes_recvd)
295862306a36Sopenharmony_ci			break;
295962306a36Sopenharmony_ci
296062306a36Sopenharmony_ci		/*
296162306a36Sopenharmony_ci		 * All incoming packets must be at least as large as a
296262306a36Sopenharmony_ci		 * response.
296362306a36Sopenharmony_ci		 */
296462306a36Sopenharmony_ci		if (bytes_recvd <= sizeof(struct pci_response))
296562306a36Sopenharmony_ci			continue;
296662306a36Sopenharmony_ci		desc = (struct vmpacket_descriptor *)buffer;
296762306a36Sopenharmony_ci
296862306a36Sopenharmony_ci		switch (desc->type) {
296962306a36Sopenharmony_ci		case VM_PKT_COMP:
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci			lock_requestor(chan, flags);
297262306a36Sopenharmony_ci			req_addr = __vmbus_request_addr_match(chan, req_id,
297362306a36Sopenharmony_ci							      VMBUS_RQST_ADDR_ANY);
297462306a36Sopenharmony_ci			if (req_addr == VMBUS_RQST_ERROR) {
297562306a36Sopenharmony_ci				unlock_requestor(chan, flags);
297662306a36Sopenharmony_ci				dev_err(&hbus->hdev->device,
297762306a36Sopenharmony_ci					"Invalid transaction ID %llx\n",
297862306a36Sopenharmony_ci					req_id);
297962306a36Sopenharmony_ci				break;
298062306a36Sopenharmony_ci			}
298162306a36Sopenharmony_ci			comp_packet = (struct pci_packet *)req_addr;
298262306a36Sopenharmony_ci			response = (struct pci_response *)buffer;
298362306a36Sopenharmony_ci			/*
298462306a36Sopenharmony_ci			 * Call ->completion_func() within the critical section to make
298562306a36Sopenharmony_ci			 * sure that the packet pointer is still valid during the call:
298662306a36Sopenharmony_ci			 * here 'valid' means that there's a task still waiting for the
298762306a36Sopenharmony_ci			 * completion, and that the packet data is still on the waiting
298862306a36Sopenharmony_ci			 * task's stack.  Cf. hv_compose_msi_msg().
298962306a36Sopenharmony_ci			 */
299062306a36Sopenharmony_ci			comp_packet->completion_func(comp_packet->compl_ctxt,
299162306a36Sopenharmony_ci						     response,
299262306a36Sopenharmony_ci						     bytes_recvd);
299362306a36Sopenharmony_ci			unlock_requestor(chan, flags);
299462306a36Sopenharmony_ci			break;
299562306a36Sopenharmony_ci
299662306a36Sopenharmony_ci		case VM_PKT_DATA_INBAND:
299762306a36Sopenharmony_ci
299862306a36Sopenharmony_ci			new_message = (struct pci_incoming_message *)buffer;
299962306a36Sopenharmony_ci			switch (new_message->message_type.type) {
300062306a36Sopenharmony_ci			case PCI_BUS_RELATIONS:
300162306a36Sopenharmony_ci
300262306a36Sopenharmony_ci				bus_rel = (struct pci_bus_relations *)buffer;
300362306a36Sopenharmony_ci				if (bytes_recvd < sizeof(*bus_rel) ||
300462306a36Sopenharmony_ci				    bytes_recvd <
300562306a36Sopenharmony_ci					struct_size(bus_rel, func,
300662306a36Sopenharmony_ci						    bus_rel->device_count)) {
300762306a36Sopenharmony_ci					dev_err(&hbus->hdev->device,
300862306a36Sopenharmony_ci						"bus relations too small\n");
300962306a36Sopenharmony_ci					break;
301062306a36Sopenharmony_ci				}
301162306a36Sopenharmony_ci
301262306a36Sopenharmony_ci				hv_pci_devices_present(hbus, bus_rel);
301362306a36Sopenharmony_ci				break;
301462306a36Sopenharmony_ci
301562306a36Sopenharmony_ci			case PCI_BUS_RELATIONS2:
301662306a36Sopenharmony_ci
301762306a36Sopenharmony_ci				bus_rel2 = (struct pci_bus_relations2 *)buffer;
301862306a36Sopenharmony_ci				if (bytes_recvd < sizeof(*bus_rel2) ||
301962306a36Sopenharmony_ci				    bytes_recvd <
302062306a36Sopenharmony_ci					struct_size(bus_rel2, func,
302162306a36Sopenharmony_ci						    bus_rel2->device_count)) {
302262306a36Sopenharmony_ci					dev_err(&hbus->hdev->device,
302362306a36Sopenharmony_ci						"bus relations v2 too small\n");
302462306a36Sopenharmony_ci					break;
302562306a36Sopenharmony_ci				}
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci				hv_pci_devices_present2(hbus, bus_rel2);
302862306a36Sopenharmony_ci				break;
302962306a36Sopenharmony_ci
303062306a36Sopenharmony_ci			case PCI_EJECT:
303162306a36Sopenharmony_ci
303262306a36Sopenharmony_ci				dev_message = (struct pci_dev_incoming *)buffer;
303362306a36Sopenharmony_ci				if (bytes_recvd < sizeof(*dev_message)) {
303462306a36Sopenharmony_ci					dev_err(&hbus->hdev->device,
303562306a36Sopenharmony_ci						"eject message too small\n");
303662306a36Sopenharmony_ci					break;
303762306a36Sopenharmony_ci				}
303862306a36Sopenharmony_ci				hpdev = get_pcichild_wslot(hbus,
303962306a36Sopenharmony_ci						      dev_message->wslot.slot);
304062306a36Sopenharmony_ci				if (hpdev) {
304162306a36Sopenharmony_ci					hv_pci_eject_device(hpdev);
304262306a36Sopenharmony_ci					put_pcichild(hpdev);
304362306a36Sopenharmony_ci				}
304462306a36Sopenharmony_ci				break;
304562306a36Sopenharmony_ci
304662306a36Sopenharmony_ci			case PCI_INVALIDATE_BLOCK:
304762306a36Sopenharmony_ci
304862306a36Sopenharmony_ci				inval = (struct pci_dev_inval_block *)buffer;
304962306a36Sopenharmony_ci				if (bytes_recvd < sizeof(*inval)) {
305062306a36Sopenharmony_ci					dev_err(&hbus->hdev->device,
305162306a36Sopenharmony_ci						"invalidate message too small\n");
305262306a36Sopenharmony_ci					break;
305362306a36Sopenharmony_ci				}
305462306a36Sopenharmony_ci				hpdev = get_pcichild_wslot(hbus,
305562306a36Sopenharmony_ci							   inval->wslot.slot);
305662306a36Sopenharmony_ci				if (hpdev) {
305762306a36Sopenharmony_ci					if (hpdev->block_invalidate) {
305862306a36Sopenharmony_ci						hpdev->block_invalidate(
305962306a36Sopenharmony_ci						    hpdev->invalidate_context,
306062306a36Sopenharmony_ci						    inval->block_mask);
306162306a36Sopenharmony_ci					}
306262306a36Sopenharmony_ci					put_pcichild(hpdev);
306362306a36Sopenharmony_ci				}
306462306a36Sopenharmony_ci				break;
306562306a36Sopenharmony_ci
306662306a36Sopenharmony_ci			default:
306762306a36Sopenharmony_ci				dev_warn(&hbus->hdev->device,
306862306a36Sopenharmony_ci					"Unimplemented protocol message %x\n",
306962306a36Sopenharmony_ci					new_message->message_type.type);
307062306a36Sopenharmony_ci				break;
307162306a36Sopenharmony_ci			}
307262306a36Sopenharmony_ci			break;
307362306a36Sopenharmony_ci
307462306a36Sopenharmony_ci		default:
307562306a36Sopenharmony_ci			dev_err(&hbus->hdev->device,
307662306a36Sopenharmony_ci				"unhandled packet type %d, tid %llx len %d\n",
307762306a36Sopenharmony_ci				desc->type, req_id, bytes_recvd);
307862306a36Sopenharmony_ci			break;
307962306a36Sopenharmony_ci		}
308062306a36Sopenharmony_ci	}
308162306a36Sopenharmony_ci
308262306a36Sopenharmony_ci	kfree(buffer);
308362306a36Sopenharmony_ci}
308462306a36Sopenharmony_ci
308562306a36Sopenharmony_ci/**
308662306a36Sopenharmony_ci * hv_pci_protocol_negotiation() - Set up protocol
308762306a36Sopenharmony_ci * @hdev:		VMBus's tracking struct for this root PCI bus.
308862306a36Sopenharmony_ci * @version:		Array of supported channel protocol versions in
308962306a36Sopenharmony_ci *			the order of probing - highest go first.
309062306a36Sopenharmony_ci * @num_version:	Number of elements in the version array.
309162306a36Sopenharmony_ci *
309262306a36Sopenharmony_ci * This driver is intended to support running on Windows 10
309362306a36Sopenharmony_ci * (server) and later versions. It will not run on earlier
309462306a36Sopenharmony_ci * versions, as they assume that many of the operations which
309562306a36Sopenharmony_ci * Linux needs accomplished with a spinlock held were done via
309662306a36Sopenharmony_ci * asynchronous messaging via VMBus.  Windows 10 increases the
309762306a36Sopenharmony_ci * surface area of PCI emulation so that these actions can take
309862306a36Sopenharmony_ci * place by suspending a virtual processor for their duration.
309962306a36Sopenharmony_ci *
310062306a36Sopenharmony_ci * This function negotiates the channel protocol version,
310162306a36Sopenharmony_ci * failing if the host doesn't support the necessary protocol
310262306a36Sopenharmony_ci * level.
310362306a36Sopenharmony_ci */
310462306a36Sopenharmony_cistatic int hv_pci_protocol_negotiation(struct hv_device *hdev,
310562306a36Sopenharmony_ci				       enum pci_protocol_version_t version[],
310662306a36Sopenharmony_ci				       int num_version)
310762306a36Sopenharmony_ci{
310862306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
310962306a36Sopenharmony_ci	struct pci_version_request *version_req;
311062306a36Sopenharmony_ci	struct hv_pci_compl comp_pkt;
311162306a36Sopenharmony_ci	struct pci_packet *pkt;
311262306a36Sopenharmony_ci	int ret;
311362306a36Sopenharmony_ci	int i;
311462306a36Sopenharmony_ci
311562306a36Sopenharmony_ci	/*
311662306a36Sopenharmony_ci	 * Initiate the handshake with the host and negotiate
311762306a36Sopenharmony_ci	 * a version that the host can support. We start with the
311862306a36Sopenharmony_ci	 * highest version number and go down if the host cannot
311962306a36Sopenharmony_ci	 * support it.
312062306a36Sopenharmony_ci	 */
312162306a36Sopenharmony_ci	pkt = kzalloc(sizeof(*pkt) + sizeof(*version_req), GFP_KERNEL);
312262306a36Sopenharmony_ci	if (!pkt)
312362306a36Sopenharmony_ci		return -ENOMEM;
312462306a36Sopenharmony_ci
312562306a36Sopenharmony_ci	init_completion(&comp_pkt.host_event);
312662306a36Sopenharmony_ci	pkt->completion_func = hv_pci_generic_compl;
312762306a36Sopenharmony_ci	pkt->compl_ctxt = &comp_pkt;
312862306a36Sopenharmony_ci	version_req = (struct pci_version_request *)&pkt->message;
312962306a36Sopenharmony_ci	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
313062306a36Sopenharmony_ci
313162306a36Sopenharmony_ci	for (i = 0; i < num_version; i++) {
313262306a36Sopenharmony_ci		version_req->protocol_version = version[i];
313362306a36Sopenharmony_ci		ret = vmbus_sendpacket(hdev->channel, version_req,
313462306a36Sopenharmony_ci				sizeof(struct pci_version_request),
313562306a36Sopenharmony_ci				(unsigned long)pkt, VM_PKT_DATA_INBAND,
313662306a36Sopenharmony_ci				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
313762306a36Sopenharmony_ci		if (!ret)
313862306a36Sopenharmony_ci			ret = wait_for_response(hdev, &comp_pkt.host_event);
313962306a36Sopenharmony_ci
314062306a36Sopenharmony_ci		if (ret) {
314162306a36Sopenharmony_ci			dev_err(&hdev->device,
314262306a36Sopenharmony_ci				"PCI Pass-through VSP failed to request version: %d",
314362306a36Sopenharmony_ci				ret);
314462306a36Sopenharmony_ci			goto exit;
314562306a36Sopenharmony_ci		}
314662306a36Sopenharmony_ci
314762306a36Sopenharmony_ci		if (comp_pkt.completion_status >= 0) {
314862306a36Sopenharmony_ci			hbus->protocol_version = version[i];
314962306a36Sopenharmony_ci			dev_info(&hdev->device,
315062306a36Sopenharmony_ci				"PCI VMBus probing: Using version %#x\n",
315162306a36Sopenharmony_ci				hbus->protocol_version);
315262306a36Sopenharmony_ci			goto exit;
315362306a36Sopenharmony_ci		}
315462306a36Sopenharmony_ci
315562306a36Sopenharmony_ci		if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
315662306a36Sopenharmony_ci			dev_err(&hdev->device,
315762306a36Sopenharmony_ci				"PCI Pass-through VSP failed version request: %#x",
315862306a36Sopenharmony_ci				comp_pkt.completion_status);
315962306a36Sopenharmony_ci			ret = -EPROTO;
316062306a36Sopenharmony_ci			goto exit;
316162306a36Sopenharmony_ci		}
316262306a36Sopenharmony_ci
316362306a36Sopenharmony_ci		reinit_completion(&comp_pkt.host_event);
316462306a36Sopenharmony_ci	}
316562306a36Sopenharmony_ci
316662306a36Sopenharmony_ci	dev_err(&hdev->device,
316762306a36Sopenharmony_ci		"PCI pass-through VSP failed to find supported version");
316862306a36Sopenharmony_ci	ret = -EPROTO;
316962306a36Sopenharmony_ci
317062306a36Sopenharmony_ciexit:
317162306a36Sopenharmony_ci	kfree(pkt);
317262306a36Sopenharmony_ci	return ret;
317362306a36Sopenharmony_ci}
317462306a36Sopenharmony_ci
317562306a36Sopenharmony_ci/**
317662306a36Sopenharmony_ci * hv_pci_free_bridge_windows() - Release memory regions for the
317762306a36Sopenharmony_ci * bus
317862306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
317962306a36Sopenharmony_ci */
318062306a36Sopenharmony_cistatic void hv_pci_free_bridge_windows(struct hv_pcibus_device *hbus)
318162306a36Sopenharmony_ci{
318262306a36Sopenharmony_ci	/*
318362306a36Sopenharmony_ci	 * Set the resources back to the way they looked when they
318462306a36Sopenharmony_ci	 * were allocated by setting IORESOURCE_BUSY again.
318562306a36Sopenharmony_ci	 */
318662306a36Sopenharmony_ci
318762306a36Sopenharmony_ci	if (hbus->low_mmio_space && hbus->low_mmio_res) {
318862306a36Sopenharmony_ci		hbus->low_mmio_res->flags |= IORESOURCE_BUSY;
318962306a36Sopenharmony_ci		vmbus_free_mmio(hbus->low_mmio_res->start,
319062306a36Sopenharmony_ci				resource_size(hbus->low_mmio_res));
319162306a36Sopenharmony_ci	}
319262306a36Sopenharmony_ci
319362306a36Sopenharmony_ci	if (hbus->high_mmio_space && hbus->high_mmio_res) {
319462306a36Sopenharmony_ci		hbus->high_mmio_res->flags |= IORESOURCE_BUSY;
319562306a36Sopenharmony_ci		vmbus_free_mmio(hbus->high_mmio_res->start,
319662306a36Sopenharmony_ci				resource_size(hbus->high_mmio_res));
319762306a36Sopenharmony_ci	}
319862306a36Sopenharmony_ci}
319962306a36Sopenharmony_ci
320062306a36Sopenharmony_ci/**
320162306a36Sopenharmony_ci * hv_pci_allocate_bridge_windows() - Allocate memory regions
320262306a36Sopenharmony_ci * for the bus
320362306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
320462306a36Sopenharmony_ci *
320562306a36Sopenharmony_ci * This function calls vmbus_allocate_mmio(), which is itself a
320662306a36Sopenharmony_ci * bit of a compromise.  Ideally, we might change the pnp layer
320762306a36Sopenharmony_ci * in the kernel such that it comprehends either PCI devices
320862306a36Sopenharmony_ci * which are "grandchildren of ACPI," with some intermediate bus
320962306a36Sopenharmony_ci * node (in this case, VMBus) or change it such that it
321062306a36Sopenharmony_ci * understands VMBus.  The pnp layer, however, has been declared
321162306a36Sopenharmony_ci * deprecated, and not subject to change.
321262306a36Sopenharmony_ci *
321362306a36Sopenharmony_ci * The workaround, implemented here, is to ask VMBus to allocate
321462306a36Sopenharmony_ci * MMIO space for this bus.  VMBus itself knows which ranges are
321562306a36Sopenharmony_ci * appropriate by looking at its own ACPI objects.  Then, after
321662306a36Sopenharmony_ci * these ranges are claimed, they're modified to look like they
321762306a36Sopenharmony_ci * would have looked if the ACPI and pnp code had allocated
321862306a36Sopenharmony_ci * bridge windows.  These descriptors have to exist in this form
321962306a36Sopenharmony_ci * in order to satisfy the code which will get invoked when the
322062306a36Sopenharmony_ci * endpoint PCI function driver calls request_mem_region() or
322162306a36Sopenharmony_ci * request_mem_region_exclusive().
322262306a36Sopenharmony_ci *
322362306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
322462306a36Sopenharmony_ci */
322562306a36Sopenharmony_cistatic int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus)
322662306a36Sopenharmony_ci{
322762306a36Sopenharmony_ci	resource_size_t align;
322862306a36Sopenharmony_ci	int ret;
322962306a36Sopenharmony_ci
323062306a36Sopenharmony_ci	if (hbus->low_mmio_space) {
323162306a36Sopenharmony_ci		align = 1ULL << (63 - __builtin_clzll(hbus->low_mmio_space));
323262306a36Sopenharmony_ci		ret = vmbus_allocate_mmio(&hbus->low_mmio_res, hbus->hdev, 0,
323362306a36Sopenharmony_ci					  (u64)(u32)0xffffffff,
323462306a36Sopenharmony_ci					  hbus->low_mmio_space,
323562306a36Sopenharmony_ci					  align, false);
323662306a36Sopenharmony_ci		if (ret) {
323762306a36Sopenharmony_ci			dev_err(&hbus->hdev->device,
323862306a36Sopenharmony_ci				"Need %#llx of low MMIO space. Consider reconfiguring the VM.\n",
323962306a36Sopenharmony_ci				hbus->low_mmio_space);
324062306a36Sopenharmony_ci			return ret;
324162306a36Sopenharmony_ci		}
324262306a36Sopenharmony_ci
324362306a36Sopenharmony_ci		/* Modify this resource to become a bridge window. */
324462306a36Sopenharmony_ci		hbus->low_mmio_res->flags |= IORESOURCE_WINDOW;
324562306a36Sopenharmony_ci		hbus->low_mmio_res->flags &= ~IORESOURCE_BUSY;
324662306a36Sopenharmony_ci		pci_add_resource(&hbus->bridge->windows, hbus->low_mmio_res);
324762306a36Sopenharmony_ci	}
324862306a36Sopenharmony_ci
324962306a36Sopenharmony_ci	if (hbus->high_mmio_space) {
325062306a36Sopenharmony_ci		align = 1ULL << (63 - __builtin_clzll(hbus->high_mmio_space));
325162306a36Sopenharmony_ci		ret = vmbus_allocate_mmio(&hbus->high_mmio_res, hbus->hdev,
325262306a36Sopenharmony_ci					  0x100000000, -1,
325362306a36Sopenharmony_ci					  hbus->high_mmio_space, align,
325462306a36Sopenharmony_ci					  false);
325562306a36Sopenharmony_ci		if (ret) {
325662306a36Sopenharmony_ci			dev_err(&hbus->hdev->device,
325762306a36Sopenharmony_ci				"Need %#llx of high MMIO space. Consider reconfiguring the VM.\n",
325862306a36Sopenharmony_ci				hbus->high_mmio_space);
325962306a36Sopenharmony_ci			goto release_low_mmio;
326062306a36Sopenharmony_ci		}
326162306a36Sopenharmony_ci
326262306a36Sopenharmony_ci		/* Modify this resource to become a bridge window. */
326362306a36Sopenharmony_ci		hbus->high_mmio_res->flags |= IORESOURCE_WINDOW;
326462306a36Sopenharmony_ci		hbus->high_mmio_res->flags &= ~IORESOURCE_BUSY;
326562306a36Sopenharmony_ci		pci_add_resource(&hbus->bridge->windows, hbus->high_mmio_res);
326662306a36Sopenharmony_ci	}
326762306a36Sopenharmony_ci
326862306a36Sopenharmony_ci	return 0;
326962306a36Sopenharmony_ci
327062306a36Sopenharmony_cirelease_low_mmio:
327162306a36Sopenharmony_ci	if (hbus->low_mmio_res) {
327262306a36Sopenharmony_ci		vmbus_free_mmio(hbus->low_mmio_res->start,
327362306a36Sopenharmony_ci				resource_size(hbus->low_mmio_res));
327462306a36Sopenharmony_ci	}
327562306a36Sopenharmony_ci
327662306a36Sopenharmony_ci	return ret;
327762306a36Sopenharmony_ci}
327862306a36Sopenharmony_ci
327962306a36Sopenharmony_ci/**
328062306a36Sopenharmony_ci * hv_allocate_config_window() - Find MMIO space for PCI Config
328162306a36Sopenharmony_ci * @hbus:	Root PCI bus, as understood by this driver
328262306a36Sopenharmony_ci *
328362306a36Sopenharmony_ci * This function claims memory-mapped I/O space for accessing
328462306a36Sopenharmony_ci * configuration space for the functions on this bus.
328562306a36Sopenharmony_ci *
328662306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
328762306a36Sopenharmony_ci */
328862306a36Sopenharmony_cistatic int hv_allocate_config_window(struct hv_pcibus_device *hbus)
328962306a36Sopenharmony_ci{
329062306a36Sopenharmony_ci	int ret;
329162306a36Sopenharmony_ci
329262306a36Sopenharmony_ci	/*
329362306a36Sopenharmony_ci	 * Set up a region of MMIO space to use for accessing configuration
329462306a36Sopenharmony_ci	 * space.
329562306a36Sopenharmony_ci	 */
329662306a36Sopenharmony_ci	ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, 0, -1,
329762306a36Sopenharmony_ci				  PCI_CONFIG_MMIO_LENGTH, 0x1000, false);
329862306a36Sopenharmony_ci	if (ret)
329962306a36Sopenharmony_ci		return ret;
330062306a36Sopenharmony_ci
330162306a36Sopenharmony_ci	/*
330262306a36Sopenharmony_ci	 * vmbus_allocate_mmio() gets used for allocating both device endpoint
330362306a36Sopenharmony_ci	 * resource claims (those which cannot be overlapped) and the ranges
330462306a36Sopenharmony_ci	 * which are valid for the children of this bus, which are intended
330562306a36Sopenharmony_ci	 * to be overlapped by those children.  Set the flag on this claim
330662306a36Sopenharmony_ci	 * meaning that this region can't be overlapped.
330762306a36Sopenharmony_ci	 */
330862306a36Sopenharmony_ci
330962306a36Sopenharmony_ci	hbus->mem_config->flags |= IORESOURCE_BUSY;
331062306a36Sopenharmony_ci
331162306a36Sopenharmony_ci	return 0;
331262306a36Sopenharmony_ci}
331362306a36Sopenharmony_ci
331462306a36Sopenharmony_cistatic void hv_free_config_window(struct hv_pcibus_device *hbus)
331562306a36Sopenharmony_ci{
331662306a36Sopenharmony_ci	vmbus_free_mmio(hbus->mem_config->start, PCI_CONFIG_MMIO_LENGTH);
331762306a36Sopenharmony_ci}
331862306a36Sopenharmony_ci
331962306a36Sopenharmony_cistatic int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs);
332062306a36Sopenharmony_ci
332162306a36Sopenharmony_ci/**
332262306a36Sopenharmony_ci * hv_pci_enter_d0() - Bring the "bus" into the D0 power state
332362306a36Sopenharmony_ci * @hdev:	VMBus's tracking struct for this root PCI bus
332462306a36Sopenharmony_ci *
332562306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
332662306a36Sopenharmony_ci */
332762306a36Sopenharmony_cistatic int hv_pci_enter_d0(struct hv_device *hdev)
332862306a36Sopenharmony_ci{
332962306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
333062306a36Sopenharmony_ci	struct pci_bus_d0_entry *d0_entry;
333162306a36Sopenharmony_ci	struct hv_pci_compl comp_pkt;
333262306a36Sopenharmony_ci	struct pci_packet *pkt;
333362306a36Sopenharmony_ci	bool retry = true;
333462306a36Sopenharmony_ci	int ret;
333562306a36Sopenharmony_ci
333662306a36Sopenharmony_cienter_d0_retry:
333762306a36Sopenharmony_ci	/*
333862306a36Sopenharmony_ci	 * Tell the host that the bus is ready to use, and moved into the
333962306a36Sopenharmony_ci	 * powered-on state.  This includes telling the host which region
334062306a36Sopenharmony_ci	 * of memory-mapped I/O space has been chosen for configuration space
334162306a36Sopenharmony_ci	 * access.
334262306a36Sopenharmony_ci	 */
334362306a36Sopenharmony_ci	pkt = kzalloc(sizeof(*pkt) + sizeof(*d0_entry), GFP_KERNEL);
334462306a36Sopenharmony_ci	if (!pkt)
334562306a36Sopenharmony_ci		return -ENOMEM;
334662306a36Sopenharmony_ci
334762306a36Sopenharmony_ci	init_completion(&comp_pkt.host_event);
334862306a36Sopenharmony_ci	pkt->completion_func = hv_pci_generic_compl;
334962306a36Sopenharmony_ci	pkt->compl_ctxt = &comp_pkt;
335062306a36Sopenharmony_ci	d0_entry = (struct pci_bus_d0_entry *)&pkt->message;
335162306a36Sopenharmony_ci	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
335262306a36Sopenharmony_ci	d0_entry->mmio_base = hbus->mem_config->start;
335362306a36Sopenharmony_ci
335462306a36Sopenharmony_ci	ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
335562306a36Sopenharmony_ci			       (unsigned long)pkt, VM_PKT_DATA_INBAND,
335662306a36Sopenharmony_ci			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
335762306a36Sopenharmony_ci	if (!ret)
335862306a36Sopenharmony_ci		ret = wait_for_response(hdev, &comp_pkt.host_event);
335962306a36Sopenharmony_ci
336062306a36Sopenharmony_ci	if (ret)
336162306a36Sopenharmony_ci		goto exit;
336262306a36Sopenharmony_ci
336362306a36Sopenharmony_ci	/*
336462306a36Sopenharmony_ci	 * In certain case (Kdump) the pci device of interest was
336562306a36Sopenharmony_ci	 * not cleanly shut down and resource is still held on host
336662306a36Sopenharmony_ci	 * side, the host could return invalid device status.
336762306a36Sopenharmony_ci	 * We need to explicitly request host to release the resource
336862306a36Sopenharmony_ci	 * and try to enter D0 again.
336962306a36Sopenharmony_ci	 */
337062306a36Sopenharmony_ci	if (comp_pkt.completion_status < 0 && retry) {
337162306a36Sopenharmony_ci		retry = false;
337262306a36Sopenharmony_ci
337362306a36Sopenharmony_ci		dev_err(&hdev->device, "Retrying D0 Entry\n");
337462306a36Sopenharmony_ci
337562306a36Sopenharmony_ci		/*
337662306a36Sopenharmony_ci		 * Hv_pci_bus_exit() calls hv_send_resource_released()
337762306a36Sopenharmony_ci		 * to free up resources of its child devices.
337862306a36Sopenharmony_ci		 * In the kdump kernel we need to set the
337962306a36Sopenharmony_ci		 * wslot_res_allocated to 255 so it scans all child
338062306a36Sopenharmony_ci		 * devices to release resources allocated in the
338162306a36Sopenharmony_ci		 * normal kernel before panic happened.
338262306a36Sopenharmony_ci		 */
338362306a36Sopenharmony_ci		hbus->wslot_res_allocated = 255;
338462306a36Sopenharmony_ci
338562306a36Sopenharmony_ci		ret = hv_pci_bus_exit(hdev, true);
338662306a36Sopenharmony_ci
338762306a36Sopenharmony_ci		if (ret == 0) {
338862306a36Sopenharmony_ci			kfree(pkt);
338962306a36Sopenharmony_ci			goto enter_d0_retry;
339062306a36Sopenharmony_ci		}
339162306a36Sopenharmony_ci		dev_err(&hdev->device,
339262306a36Sopenharmony_ci			"Retrying D0 failed with ret %d\n", ret);
339362306a36Sopenharmony_ci	}
339462306a36Sopenharmony_ci
339562306a36Sopenharmony_ci	if (comp_pkt.completion_status < 0) {
339662306a36Sopenharmony_ci		dev_err(&hdev->device,
339762306a36Sopenharmony_ci			"PCI Pass-through VSP failed D0 Entry with status %x\n",
339862306a36Sopenharmony_ci			comp_pkt.completion_status);
339962306a36Sopenharmony_ci		ret = -EPROTO;
340062306a36Sopenharmony_ci		goto exit;
340162306a36Sopenharmony_ci	}
340262306a36Sopenharmony_ci
340362306a36Sopenharmony_ci	ret = 0;
340462306a36Sopenharmony_ci
340562306a36Sopenharmony_ciexit:
340662306a36Sopenharmony_ci	kfree(pkt);
340762306a36Sopenharmony_ci	return ret;
340862306a36Sopenharmony_ci}
340962306a36Sopenharmony_ci
341062306a36Sopenharmony_ci/**
341162306a36Sopenharmony_ci * hv_pci_query_relations() - Ask host to send list of child
341262306a36Sopenharmony_ci * devices
341362306a36Sopenharmony_ci * @hdev:	VMBus's tracking struct for this root PCI bus
341462306a36Sopenharmony_ci *
341562306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
341662306a36Sopenharmony_ci */
341762306a36Sopenharmony_cistatic int hv_pci_query_relations(struct hv_device *hdev)
341862306a36Sopenharmony_ci{
341962306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
342062306a36Sopenharmony_ci	struct pci_message message;
342162306a36Sopenharmony_ci	struct completion comp;
342262306a36Sopenharmony_ci	int ret;
342362306a36Sopenharmony_ci
342462306a36Sopenharmony_ci	/* Ask the host to send along the list of child devices */
342562306a36Sopenharmony_ci	init_completion(&comp);
342662306a36Sopenharmony_ci	if (cmpxchg(&hbus->survey_event, NULL, &comp))
342762306a36Sopenharmony_ci		return -ENOTEMPTY;
342862306a36Sopenharmony_ci
342962306a36Sopenharmony_ci	memset(&message, 0, sizeof(message));
343062306a36Sopenharmony_ci	message.type = PCI_QUERY_BUS_RELATIONS;
343162306a36Sopenharmony_ci
343262306a36Sopenharmony_ci	ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
343362306a36Sopenharmony_ci			       0, VM_PKT_DATA_INBAND, 0);
343462306a36Sopenharmony_ci	if (!ret)
343562306a36Sopenharmony_ci		ret = wait_for_response(hdev, &comp);
343662306a36Sopenharmony_ci
343762306a36Sopenharmony_ci	/*
343862306a36Sopenharmony_ci	 * In the case of fast device addition/removal, it's possible that
343962306a36Sopenharmony_ci	 * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
344062306a36Sopenharmony_ci	 * already got a PCI_BUS_RELATIONS* message from the host and the
344162306a36Sopenharmony_ci	 * channel callback already scheduled a work to hbus->wq, which can be
344262306a36Sopenharmony_ci	 * running pci_devices_present_work() -> survey_child_resources() ->
344362306a36Sopenharmony_ci	 * complete(&hbus->survey_event), even after hv_pci_query_relations()
344462306a36Sopenharmony_ci	 * exits and the stack variable 'comp' is no longer valid; as a result,
344562306a36Sopenharmony_ci	 * a hang or a page fault may happen when the complete() calls
344662306a36Sopenharmony_ci	 * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
344762306a36Sopenharmony_ci	 * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
344862306a36Sopenharmony_ci	 * -ENODEV, there can't be any more work item scheduled to hbus->wq
344962306a36Sopenharmony_ci	 * after the flush_workqueue(): see vmbus_onoffer_rescind() ->
345062306a36Sopenharmony_ci	 * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
345162306a36Sopenharmony_ci	 * channel->rescind = true.
345262306a36Sopenharmony_ci	 */
345362306a36Sopenharmony_ci	flush_workqueue(hbus->wq);
345462306a36Sopenharmony_ci
345562306a36Sopenharmony_ci	return ret;
345662306a36Sopenharmony_ci}
345762306a36Sopenharmony_ci
345862306a36Sopenharmony_ci/**
345962306a36Sopenharmony_ci * hv_send_resources_allocated() - Report local resource choices
346062306a36Sopenharmony_ci * @hdev:	VMBus's tracking struct for this root PCI bus
346162306a36Sopenharmony_ci *
346262306a36Sopenharmony_ci * The host OS is expecting to be sent a request as a message
346362306a36Sopenharmony_ci * which contains all the resources that the device will use.
346462306a36Sopenharmony_ci * The response contains those same resources, "translated"
346562306a36Sopenharmony_ci * which is to say, the values which should be used by the
346662306a36Sopenharmony_ci * hardware, when it delivers an interrupt.  (MMIO resources are
346762306a36Sopenharmony_ci * used in local terms.)  This is nice for Windows, and lines up
346862306a36Sopenharmony_ci * with the FDO/PDO split, which doesn't exist in Linux.  Linux
346962306a36Sopenharmony_ci * is deeply expecting to scan an emulated PCI configuration
347062306a36Sopenharmony_ci * space.  So this message is sent here only to drive the state
347162306a36Sopenharmony_ci * machine on the host forward.
347262306a36Sopenharmony_ci *
347362306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
347462306a36Sopenharmony_ci */
347562306a36Sopenharmony_cistatic int hv_send_resources_allocated(struct hv_device *hdev)
347662306a36Sopenharmony_ci{
347762306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
347862306a36Sopenharmony_ci	struct pci_resources_assigned *res_assigned;
347962306a36Sopenharmony_ci	struct pci_resources_assigned2 *res_assigned2;
348062306a36Sopenharmony_ci	struct hv_pci_compl comp_pkt;
348162306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
348262306a36Sopenharmony_ci	struct pci_packet *pkt;
348362306a36Sopenharmony_ci	size_t size_res;
348462306a36Sopenharmony_ci	int wslot;
348562306a36Sopenharmony_ci	int ret;
348662306a36Sopenharmony_ci
348762306a36Sopenharmony_ci	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2)
348862306a36Sopenharmony_ci			? sizeof(*res_assigned) : sizeof(*res_assigned2);
348962306a36Sopenharmony_ci
349062306a36Sopenharmony_ci	pkt = kmalloc(sizeof(*pkt) + size_res, GFP_KERNEL);
349162306a36Sopenharmony_ci	if (!pkt)
349262306a36Sopenharmony_ci		return -ENOMEM;
349362306a36Sopenharmony_ci
349462306a36Sopenharmony_ci	ret = 0;
349562306a36Sopenharmony_ci
349662306a36Sopenharmony_ci	for (wslot = 0; wslot < 256; wslot++) {
349762306a36Sopenharmony_ci		hpdev = get_pcichild_wslot(hbus, wslot);
349862306a36Sopenharmony_ci		if (!hpdev)
349962306a36Sopenharmony_ci			continue;
350062306a36Sopenharmony_ci
350162306a36Sopenharmony_ci		memset(pkt, 0, sizeof(*pkt) + size_res);
350262306a36Sopenharmony_ci		init_completion(&comp_pkt.host_event);
350362306a36Sopenharmony_ci		pkt->completion_func = hv_pci_generic_compl;
350462306a36Sopenharmony_ci		pkt->compl_ctxt = &comp_pkt;
350562306a36Sopenharmony_ci
350662306a36Sopenharmony_ci		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) {
350762306a36Sopenharmony_ci			res_assigned =
350862306a36Sopenharmony_ci				(struct pci_resources_assigned *)&pkt->message;
350962306a36Sopenharmony_ci			res_assigned->message_type.type =
351062306a36Sopenharmony_ci				PCI_RESOURCES_ASSIGNED;
351162306a36Sopenharmony_ci			res_assigned->wslot.slot = hpdev->desc.win_slot.slot;
351262306a36Sopenharmony_ci		} else {
351362306a36Sopenharmony_ci			res_assigned2 =
351462306a36Sopenharmony_ci				(struct pci_resources_assigned2 *)&pkt->message;
351562306a36Sopenharmony_ci			res_assigned2->message_type.type =
351662306a36Sopenharmony_ci				PCI_RESOURCES_ASSIGNED2;
351762306a36Sopenharmony_ci			res_assigned2->wslot.slot = hpdev->desc.win_slot.slot;
351862306a36Sopenharmony_ci		}
351962306a36Sopenharmony_ci		put_pcichild(hpdev);
352062306a36Sopenharmony_ci
352162306a36Sopenharmony_ci		ret = vmbus_sendpacket(hdev->channel, &pkt->message,
352262306a36Sopenharmony_ci				size_res, (unsigned long)pkt,
352362306a36Sopenharmony_ci				VM_PKT_DATA_INBAND,
352462306a36Sopenharmony_ci				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
352562306a36Sopenharmony_ci		if (!ret)
352662306a36Sopenharmony_ci			ret = wait_for_response(hdev, &comp_pkt.host_event);
352762306a36Sopenharmony_ci		if (ret)
352862306a36Sopenharmony_ci			break;
352962306a36Sopenharmony_ci
353062306a36Sopenharmony_ci		if (comp_pkt.completion_status < 0) {
353162306a36Sopenharmony_ci			ret = -EPROTO;
353262306a36Sopenharmony_ci			dev_err(&hdev->device,
353362306a36Sopenharmony_ci				"resource allocated returned 0x%x",
353462306a36Sopenharmony_ci				comp_pkt.completion_status);
353562306a36Sopenharmony_ci			break;
353662306a36Sopenharmony_ci		}
353762306a36Sopenharmony_ci
353862306a36Sopenharmony_ci		hbus->wslot_res_allocated = wslot;
353962306a36Sopenharmony_ci	}
354062306a36Sopenharmony_ci
354162306a36Sopenharmony_ci	kfree(pkt);
354262306a36Sopenharmony_ci	return ret;
354362306a36Sopenharmony_ci}
354462306a36Sopenharmony_ci
354562306a36Sopenharmony_ci/**
354662306a36Sopenharmony_ci * hv_send_resources_released() - Report local resources
354762306a36Sopenharmony_ci * released
354862306a36Sopenharmony_ci * @hdev:	VMBus's tracking struct for this root PCI bus
354962306a36Sopenharmony_ci *
355062306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
355162306a36Sopenharmony_ci */
355262306a36Sopenharmony_cistatic int hv_send_resources_released(struct hv_device *hdev)
355362306a36Sopenharmony_ci{
355462306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
355562306a36Sopenharmony_ci	struct pci_child_message pkt;
355662306a36Sopenharmony_ci	struct hv_pci_dev *hpdev;
355762306a36Sopenharmony_ci	int wslot;
355862306a36Sopenharmony_ci	int ret;
355962306a36Sopenharmony_ci
356062306a36Sopenharmony_ci	for (wslot = hbus->wslot_res_allocated; wslot >= 0; wslot--) {
356162306a36Sopenharmony_ci		hpdev = get_pcichild_wslot(hbus, wslot);
356262306a36Sopenharmony_ci		if (!hpdev)
356362306a36Sopenharmony_ci			continue;
356462306a36Sopenharmony_ci
356562306a36Sopenharmony_ci		memset(&pkt, 0, sizeof(pkt));
356662306a36Sopenharmony_ci		pkt.message_type.type = PCI_RESOURCES_RELEASED;
356762306a36Sopenharmony_ci		pkt.wslot.slot = hpdev->desc.win_slot.slot;
356862306a36Sopenharmony_ci
356962306a36Sopenharmony_ci		put_pcichild(hpdev);
357062306a36Sopenharmony_ci
357162306a36Sopenharmony_ci		ret = vmbus_sendpacket(hdev->channel, &pkt, sizeof(pkt), 0,
357262306a36Sopenharmony_ci				       VM_PKT_DATA_INBAND, 0);
357362306a36Sopenharmony_ci		if (ret)
357462306a36Sopenharmony_ci			return ret;
357562306a36Sopenharmony_ci
357662306a36Sopenharmony_ci		hbus->wslot_res_allocated = wslot - 1;
357762306a36Sopenharmony_ci	}
357862306a36Sopenharmony_ci
357962306a36Sopenharmony_ci	hbus->wslot_res_allocated = -1;
358062306a36Sopenharmony_ci
358162306a36Sopenharmony_ci	return 0;
358262306a36Sopenharmony_ci}
358362306a36Sopenharmony_ci
358462306a36Sopenharmony_ci#define HVPCI_DOM_MAP_SIZE (64 * 1024)
358562306a36Sopenharmony_cistatic DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
358662306a36Sopenharmony_ci
358762306a36Sopenharmony_ci/*
358862306a36Sopenharmony_ci * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
358962306a36Sopenharmony_ci * as invalid for passthrough PCI devices of this driver.
359062306a36Sopenharmony_ci */
359162306a36Sopenharmony_ci#define HVPCI_DOM_INVALID 0
359262306a36Sopenharmony_ci
359362306a36Sopenharmony_ci/**
359462306a36Sopenharmony_ci * hv_get_dom_num() - Get a valid PCI domain number
359562306a36Sopenharmony_ci * Check if the PCI domain number is in use, and return another number if
359662306a36Sopenharmony_ci * it is in use.
359762306a36Sopenharmony_ci *
359862306a36Sopenharmony_ci * @dom: Requested domain number
359962306a36Sopenharmony_ci *
360062306a36Sopenharmony_ci * return: domain number on success, HVPCI_DOM_INVALID on failure
360162306a36Sopenharmony_ci */
360262306a36Sopenharmony_cistatic u16 hv_get_dom_num(u16 dom)
360362306a36Sopenharmony_ci{
360462306a36Sopenharmony_ci	unsigned int i;
360562306a36Sopenharmony_ci
360662306a36Sopenharmony_ci	if (test_and_set_bit(dom, hvpci_dom_map) == 0)
360762306a36Sopenharmony_ci		return dom;
360862306a36Sopenharmony_ci
360962306a36Sopenharmony_ci	for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
361062306a36Sopenharmony_ci		if (test_and_set_bit(i, hvpci_dom_map) == 0)
361162306a36Sopenharmony_ci			return i;
361262306a36Sopenharmony_ci	}
361362306a36Sopenharmony_ci
361462306a36Sopenharmony_ci	return HVPCI_DOM_INVALID;
361562306a36Sopenharmony_ci}
361662306a36Sopenharmony_ci
361762306a36Sopenharmony_ci/**
361862306a36Sopenharmony_ci * hv_put_dom_num() - Mark the PCI domain number as free
361962306a36Sopenharmony_ci * @dom: Domain number to be freed
362062306a36Sopenharmony_ci */
362162306a36Sopenharmony_cistatic void hv_put_dom_num(u16 dom)
362262306a36Sopenharmony_ci{
362362306a36Sopenharmony_ci	clear_bit(dom, hvpci_dom_map);
362462306a36Sopenharmony_ci}
362562306a36Sopenharmony_ci
362662306a36Sopenharmony_ci/**
362762306a36Sopenharmony_ci * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
362862306a36Sopenharmony_ci * @hdev:	VMBus's tracking struct for this root PCI bus
362962306a36Sopenharmony_ci * @dev_id:	Identifies the device itself
363062306a36Sopenharmony_ci *
363162306a36Sopenharmony_ci * Return: 0 on success, -errno on failure
363262306a36Sopenharmony_ci */
363362306a36Sopenharmony_cistatic int hv_pci_probe(struct hv_device *hdev,
363462306a36Sopenharmony_ci			const struct hv_vmbus_device_id *dev_id)
363562306a36Sopenharmony_ci{
363662306a36Sopenharmony_ci	struct pci_host_bridge *bridge;
363762306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
363862306a36Sopenharmony_ci	u16 dom_req, dom;
363962306a36Sopenharmony_ci	char *name;
364062306a36Sopenharmony_ci	int ret;
364162306a36Sopenharmony_ci
364262306a36Sopenharmony_ci	bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
364362306a36Sopenharmony_ci	if (!bridge)
364462306a36Sopenharmony_ci		return -ENOMEM;
364562306a36Sopenharmony_ci
364662306a36Sopenharmony_ci	hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
364762306a36Sopenharmony_ci	if (!hbus)
364862306a36Sopenharmony_ci		return -ENOMEM;
364962306a36Sopenharmony_ci
365062306a36Sopenharmony_ci	hbus->bridge = bridge;
365162306a36Sopenharmony_ci	mutex_init(&hbus->state_lock);
365262306a36Sopenharmony_ci	hbus->state = hv_pcibus_init;
365362306a36Sopenharmony_ci	hbus->wslot_res_allocated = -1;
365462306a36Sopenharmony_ci
365562306a36Sopenharmony_ci	/*
365662306a36Sopenharmony_ci	 * The PCI bus "domain" is what is called "segment" in ACPI and other
365762306a36Sopenharmony_ci	 * specs. Pull it from the instance ID, to get something usually
365862306a36Sopenharmony_ci	 * unique. In rare cases of collision, we will find out another number
365962306a36Sopenharmony_ci	 * not in use.
366062306a36Sopenharmony_ci	 *
366162306a36Sopenharmony_ci	 * Note that, since this code only runs in a Hyper-V VM, Hyper-V
366262306a36Sopenharmony_ci	 * together with this guest driver can guarantee that (1) The only
366362306a36Sopenharmony_ci	 * domain used by Gen1 VMs for something that looks like a physical
366462306a36Sopenharmony_ci	 * PCI bus (which is actually emulated by the hypervisor) is domain 0.
366562306a36Sopenharmony_ci	 * (2) There will be no overlap between domains (after fixing possible
366662306a36Sopenharmony_ci	 * collisions) in the same VM.
366762306a36Sopenharmony_ci	 */
366862306a36Sopenharmony_ci	dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
366962306a36Sopenharmony_ci	dom = hv_get_dom_num(dom_req);
367062306a36Sopenharmony_ci
367162306a36Sopenharmony_ci	if (dom == HVPCI_DOM_INVALID) {
367262306a36Sopenharmony_ci		dev_err(&hdev->device,
367362306a36Sopenharmony_ci			"Unable to use dom# 0x%x or other numbers", dom_req);
367462306a36Sopenharmony_ci		ret = -EINVAL;
367562306a36Sopenharmony_ci		goto free_bus;
367662306a36Sopenharmony_ci	}
367762306a36Sopenharmony_ci
367862306a36Sopenharmony_ci	if (dom != dom_req)
367962306a36Sopenharmony_ci		dev_info(&hdev->device,
368062306a36Sopenharmony_ci			 "PCI dom# 0x%x has collision, using 0x%x",
368162306a36Sopenharmony_ci			 dom_req, dom);
368262306a36Sopenharmony_ci
368362306a36Sopenharmony_ci	hbus->bridge->domain_nr = dom;
368462306a36Sopenharmony_ci#ifdef CONFIG_X86
368562306a36Sopenharmony_ci	hbus->sysdata.domain = dom;
368662306a36Sopenharmony_ci	hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS);
368762306a36Sopenharmony_ci#elif defined(CONFIG_ARM64)
368862306a36Sopenharmony_ci	/*
368962306a36Sopenharmony_ci	 * Set the PCI bus parent to be the corresponding VMbus
369062306a36Sopenharmony_ci	 * device. Then the VMbus device will be assigned as the
369162306a36Sopenharmony_ci	 * ACPI companion in pcibios_root_bridge_prepare() and
369262306a36Sopenharmony_ci	 * pci_dma_configure() will propagate device coherence
369362306a36Sopenharmony_ci	 * information to devices created on the bus.
369462306a36Sopenharmony_ci	 */
369562306a36Sopenharmony_ci	hbus->sysdata.parent = hdev->device.parent;
369662306a36Sopenharmony_ci	hbus->use_calls = false;
369762306a36Sopenharmony_ci#endif
369862306a36Sopenharmony_ci
369962306a36Sopenharmony_ci	hbus->hdev = hdev;
370062306a36Sopenharmony_ci	INIT_LIST_HEAD(&hbus->children);
370162306a36Sopenharmony_ci	INIT_LIST_HEAD(&hbus->dr_list);
370262306a36Sopenharmony_ci	spin_lock_init(&hbus->config_lock);
370362306a36Sopenharmony_ci	spin_lock_init(&hbus->device_list_lock);
370462306a36Sopenharmony_ci	hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
370562306a36Sopenharmony_ci					   hbus->bridge->domain_nr);
370662306a36Sopenharmony_ci	if (!hbus->wq) {
370762306a36Sopenharmony_ci		ret = -ENOMEM;
370862306a36Sopenharmony_ci		goto free_dom;
370962306a36Sopenharmony_ci	}
371062306a36Sopenharmony_ci
371162306a36Sopenharmony_ci	hdev->channel->next_request_id_callback = vmbus_next_request_id;
371262306a36Sopenharmony_ci	hdev->channel->request_addr_callback = vmbus_request_addr;
371362306a36Sopenharmony_ci	hdev->channel->rqstor_size = HV_PCI_RQSTOR_SIZE;
371462306a36Sopenharmony_ci
371562306a36Sopenharmony_ci	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
371662306a36Sopenharmony_ci			 hv_pci_onchannelcallback, hbus);
371762306a36Sopenharmony_ci	if (ret)
371862306a36Sopenharmony_ci		goto destroy_wq;
371962306a36Sopenharmony_ci
372062306a36Sopenharmony_ci	hv_set_drvdata(hdev, hbus);
372162306a36Sopenharmony_ci
372262306a36Sopenharmony_ci	ret = hv_pci_protocol_negotiation(hdev, pci_protocol_versions,
372362306a36Sopenharmony_ci					  ARRAY_SIZE(pci_protocol_versions));
372462306a36Sopenharmony_ci	if (ret)
372562306a36Sopenharmony_ci		goto close;
372662306a36Sopenharmony_ci
372762306a36Sopenharmony_ci	ret = hv_allocate_config_window(hbus);
372862306a36Sopenharmony_ci	if (ret)
372962306a36Sopenharmony_ci		goto close;
373062306a36Sopenharmony_ci
373162306a36Sopenharmony_ci	hbus->cfg_addr = ioremap(hbus->mem_config->start,
373262306a36Sopenharmony_ci				 PCI_CONFIG_MMIO_LENGTH);
373362306a36Sopenharmony_ci	if (!hbus->cfg_addr) {
373462306a36Sopenharmony_ci		dev_err(&hdev->device,
373562306a36Sopenharmony_ci			"Unable to map a virtual address for config space\n");
373662306a36Sopenharmony_ci		ret = -ENOMEM;
373762306a36Sopenharmony_ci		goto free_config;
373862306a36Sopenharmony_ci	}
373962306a36Sopenharmony_ci
374062306a36Sopenharmony_ci	name = kasprintf(GFP_KERNEL, "%pUL", &hdev->dev_instance);
374162306a36Sopenharmony_ci	if (!name) {
374262306a36Sopenharmony_ci		ret = -ENOMEM;
374362306a36Sopenharmony_ci		goto unmap;
374462306a36Sopenharmony_ci	}
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	hbus->fwnode = irq_domain_alloc_named_fwnode(name);
374762306a36Sopenharmony_ci	kfree(name);
374862306a36Sopenharmony_ci	if (!hbus->fwnode) {
374962306a36Sopenharmony_ci		ret = -ENOMEM;
375062306a36Sopenharmony_ci		goto unmap;
375162306a36Sopenharmony_ci	}
375262306a36Sopenharmony_ci
375362306a36Sopenharmony_ci	ret = hv_pcie_init_irq_domain(hbus);
375462306a36Sopenharmony_ci	if (ret)
375562306a36Sopenharmony_ci		goto free_fwnode;
375662306a36Sopenharmony_ci
375762306a36Sopenharmony_ci	ret = hv_pci_query_relations(hdev);
375862306a36Sopenharmony_ci	if (ret)
375962306a36Sopenharmony_ci		goto free_irq_domain;
376062306a36Sopenharmony_ci
376162306a36Sopenharmony_ci	mutex_lock(&hbus->state_lock);
376262306a36Sopenharmony_ci
376362306a36Sopenharmony_ci	ret = hv_pci_enter_d0(hdev);
376462306a36Sopenharmony_ci	if (ret)
376562306a36Sopenharmony_ci		goto release_state_lock;
376662306a36Sopenharmony_ci
376762306a36Sopenharmony_ci	ret = hv_pci_allocate_bridge_windows(hbus);
376862306a36Sopenharmony_ci	if (ret)
376962306a36Sopenharmony_ci		goto exit_d0;
377062306a36Sopenharmony_ci
377162306a36Sopenharmony_ci	ret = hv_send_resources_allocated(hdev);
377262306a36Sopenharmony_ci	if (ret)
377362306a36Sopenharmony_ci		goto free_windows;
377462306a36Sopenharmony_ci
377562306a36Sopenharmony_ci	prepopulate_bars(hbus);
377662306a36Sopenharmony_ci
377762306a36Sopenharmony_ci	hbus->state = hv_pcibus_probed;
377862306a36Sopenharmony_ci
377962306a36Sopenharmony_ci	ret = create_root_hv_pci_bus(hbus);
378062306a36Sopenharmony_ci	if (ret)
378162306a36Sopenharmony_ci		goto free_windows;
378262306a36Sopenharmony_ci
378362306a36Sopenharmony_ci	mutex_unlock(&hbus->state_lock);
378462306a36Sopenharmony_ci	return 0;
378562306a36Sopenharmony_ci
378662306a36Sopenharmony_cifree_windows:
378762306a36Sopenharmony_ci	hv_pci_free_bridge_windows(hbus);
378862306a36Sopenharmony_ciexit_d0:
378962306a36Sopenharmony_ci	(void) hv_pci_bus_exit(hdev, true);
379062306a36Sopenharmony_cirelease_state_lock:
379162306a36Sopenharmony_ci	mutex_unlock(&hbus->state_lock);
379262306a36Sopenharmony_cifree_irq_domain:
379362306a36Sopenharmony_ci	irq_domain_remove(hbus->irq_domain);
379462306a36Sopenharmony_cifree_fwnode:
379562306a36Sopenharmony_ci	irq_domain_free_fwnode(hbus->fwnode);
379662306a36Sopenharmony_ciunmap:
379762306a36Sopenharmony_ci	iounmap(hbus->cfg_addr);
379862306a36Sopenharmony_cifree_config:
379962306a36Sopenharmony_ci	hv_free_config_window(hbus);
380062306a36Sopenharmony_ciclose:
380162306a36Sopenharmony_ci	vmbus_close(hdev->channel);
380262306a36Sopenharmony_cidestroy_wq:
380362306a36Sopenharmony_ci	destroy_workqueue(hbus->wq);
380462306a36Sopenharmony_cifree_dom:
380562306a36Sopenharmony_ci	hv_put_dom_num(hbus->bridge->domain_nr);
380662306a36Sopenharmony_cifree_bus:
380762306a36Sopenharmony_ci	kfree(hbus);
380862306a36Sopenharmony_ci	return ret;
380962306a36Sopenharmony_ci}
381062306a36Sopenharmony_ci
381162306a36Sopenharmony_cistatic int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
381262306a36Sopenharmony_ci{
381362306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
381462306a36Sopenharmony_ci	struct vmbus_channel *chan = hdev->channel;
381562306a36Sopenharmony_ci	struct {
381662306a36Sopenharmony_ci		struct pci_packet teardown_packet;
381762306a36Sopenharmony_ci		u8 buffer[sizeof(struct pci_message)];
381862306a36Sopenharmony_ci	} pkt;
381962306a36Sopenharmony_ci	struct hv_pci_compl comp_pkt;
382062306a36Sopenharmony_ci	struct hv_pci_dev *hpdev, *tmp;
382162306a36Sopenharmony_ci	unsigned long flags;
382262306a36Sopenharmony_ci	u64 trans_id;
382362306a36Sopenharmony_ci	int ret;
382462306a36Sopenharmony_ci
382562306a36Sopenharmony_ci	/*
382662306a36Sopenharmony_ci	 * After the host sends the RESCIND_CHANNEL message, it doesn't
382762306a36Sopenharmony_ci	 * access the per-channel ringbuffer any longer.
382862306a36Sopenharmony_ci	 */
382962306a36Sopenharmony_ci	if (chan->rescind)
383062306a36Sopenharmony_ci		return 0;
383162306a36Sopenharmony_ci
383262306a36Sopenharmony_ci	if (!keep_devs) {
383362306a36Sopenharmony_ci		struct list_head removed;
383462306a36Sopenharmony_ci
383562306a36Sopenharmony_ci		/* Move all present children to the list on stack */
383662306a36Sopenharmony_ci		INIT_LIST_HEAD(&removed);
383762306a36Sopenharmony_ci		spin_lock_irqsave(&hbus->device_list_lock, flags);
383862306a36Sopenharmony_ci		list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry)
383962306a36Sopenharmony_ci			list_move_tail(&hpdev->list_entry, &removed);
384062306a36Sopenharmony_ci		spin_unlock_irqrestore(&hbus->device_list_lock, flags);
384162306a36Sopenharmony_ci
384262306a36Sopenharmony_ci		/* Remove all children in the list */
384362306a36Sopenharmony_ci		list_for_each_entry_safe(hpdev, tmp, &removed, list_entry) {
384462306a36Sopenharmony_ci			list_del(&hpdev->list_entry);
384562306a36Sopenharmony_ci			if (hpdev->pci_slot)
384662306a36Sopenharmony_ci				pci_destroy_slot(hpdev->pci_slot);
384762306a36Sopenharmony_ci			/* For the two refs got in new_pcichild_device() */
384862306a36Sopenharmony_ci			put_pcichild(hpdev);
384962306a36Sopenharmony_ci			put_pcichild(hpdev);
385062306a36Sopenharmony_ci		}
385162306a36Sopenharmony_ci	}
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci	ret = hv_send_resources_released(hdev);
385462306a36Sopenharmony_ci	if (ret) {
385562306a36Sopenharmony_ci		dev_err(&hdev->device,
385662306a36Sopenharmony_ci			"Couldn't send resources released packet(s)\n");
385762306a36Sopenharmony_ci		return ret;
385862306a36Sopenharmony_ci	}
385962306a36Sopenharmony_ci
386062306a36Sopenharmony_ci	memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet));
386162306a36Sopenharmony_ci	init_completion(&comp_pkt.host_event);
386262306a36Sopenharmony_ci	pkt.teardown_packet.completion_func = hv_pci_generic_compl;
386362306a36Sopenharmony_ci	pkt.teardown_packet.compl_ctxt = &comp_pkt;
386462306a36Sopenharmony_ci	pkt.teardown_packet.message[0].type = PCI_BUS_D0EXIT;
386562306a36Sopenharmony_ci
386662306a36Sopenharmony_ci	ret = vmbus_sendpacket_getid(chan, &pkt.teardown_packet.message,
386762306a36Sopenharmony_ci				     sizeof(struct pci_message),
386862306a36Sopenharmony_ci				     (unsigned long)&pkt.teardown_packet,
386962306a36Sopenharmony_ci				     &trans_id, VM_PKT_DATA_INBAND,
387062306a36Sopenharmony_ci				     VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
387162306a36Sopenharmony_ci	if (ret)
387262306a36Sopenharmony_ci		return ret;
387362306a36Sopenharmony_ci
387462306a36Sopenharmony_ci	if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0) {
387562306a36Sopenharmony_ci		/*
387662306a36Sopenharmony_ci		 * The completion packet on the stack becomes invalid after
387762306a36Sopenharmony_ci		 * 'return'; remove the ID from the VMbus requestor if the
387862306a36Sopenharmony_ci		 * identifier is still mapped to/associated with the packet.
387962306a36Sopenharmony_ci		 *
388062306a36Sopenharmony_ci		 * Cf. hv_pci_onchannelcallback().
388162306a36Sopenharmony_ci		 */
388262306a36Sopenharmony_ci		vmbus_request_addr_match(chan, trans_id,
388362306a36Sopenharmony_ci					 (unsigned long)&pkt.teardown_packet);
388462306a36Sopenharmony_ci		return -ETIMEDOUT;
388562306a36Sopenharmony_ci	}
388662306a36Sopenharmony_ci
388762306a36Sopenharmony_ci	return 0;
388862306a36Sopenharmony_ci}
388962306a36Sopenharmony_ci
389062306a36Sopenharmony_ci/**
389162306a36Sopenharmony_ci * hv_pci_remove() - Remove routine for this VMBus channel
389262306a36Sopenharmony_ci * @hdev:	VMBus's tracking struct for this root PCI bus
389362306a36Sopenharmony_ci */
389462306a36Sopenharmony_cistatic void hv_pci_remove(struct hv_device *hdev)
389562306a36Sopenharmony_ci{
389662306a36Sopenharmony_ci	struct hv_pcibus_device *hbus;
389762306a36Sopenharmony_ci
389862306a36Sopenharmony_ci	hbus = hv_get_drvdata(hdev);
389962306a36Sopenharmony_ci	if (hbus->state == hv_pcibus_installed) {
390062306a36Sopenharmony_ci		tasklet_disable(&hdev->channel->callback_event);
390162306a36Sopenharmony_ci		hbus->state = hv_pcibus_removing;
390262306a36Sopenharmony_ci		tasklet_enable(&hdev->channel->callback_event);
390362306a36Sopenharmony_ci		destroy_workqueue(hbus->wq);
390462306a36Sopenharmony_ci		hbus->wq = NULL;
390562306a36Sopenharmony_ci		/*
390662306a36Sopenharmony_ci		 * At this point, no work is running or can be scheduled
390762306a36Sopenharmony_ci		 * on hbus-wq. We can't race with hv_pci_devices_present()
390862306a36Sopenharmony_ci		 * or hv_pci_eject_device(), it's safe to proceed.
390962306a36Sopenharmony_ci		 */
391062306a36Sopenharmony_ci
391162306a36Sopenharmony_ci		/* Remove the bus from PCI's point of view. */
391262306a36Sopenharmony_ci		pci_lock_rescan_remove();
391362306a36Sopenharmony_ci		pci_stop_root_bus(hbus->bridge->bus);
391462306a36Sopenharmony_ci		hv_pci_remove_slots(hbus);
391562306a36Sopenharmony_ci		pci_remove_root_bus(hbus->bridge->bus);
391662306a36Sopenharmony_ci		pci_unlock_rescan_remove();
391762306a36Sopenharmony_ci	}
391862306a36Sopenharmony_ci
391962306a36Sopenharmony_ci	hv_pci_bus_exit(hdev, false);
392062306a36Sopenharmony_ci
392162306a36Sopenharmony_ci	vmbus_close(hdev->channel);
392262306a36Sopenharmony_ci
392362306a36Sopenharmony_ci	iounmap(hbus->cfg_addr);
392462306a36Sopenharmony_ci	hv_free_config_window(hbus);
392562306a36Sopenharmony_ci	hv_pci_free_bridge_windows(hbus);
392662306a36Sopenharmony_ci	irq_domain_remove(hbus->irq_domain);
392762306a36Sopenharmony_ci	irq_domain_free_fwnode(hbus->fwnode);
392862306a36Sopenharmony_ci
392962306a36Sopenharmony_ci	hv_put_dom_num(hbus->bridge->domain_nr);
393062306a36Sopenharmony_ci
393162306a36Sopenharmony_ci	kfree(hbus);
393262306a36Sopenharmony_ci}
393362306a36Sopenharmony_ci
393462306a36Sopenharmony_cistatic int hv_pci_suspend(struct hv_device *hdev)
393562306a36Sopenharmony_ci{
393662306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
393762306a36Sopenharmony_ci	enum hv_pcibus_state old_state;
393862306a36Sopenharmony_ci	int ret;
393962306a36Sopenharmony_ci
394062306a36Sopenharmony_ci	/*
394162306a36Sopenharmony_ci	 * hv_pci_suspend() must make sure there are no pending work items
394262306a36Sopenharmony_ci	 * before calling vmbus_close(), since it runs in a process context
394362306a36Sopenharmony_ci	 * as a callback in dpm_suspend().  When it starts to run, the channel
394462306a36Sopenharmony_ci	 * callback hv_pci_onchannelcallback(), which runs in a tasklet
394562306a36Sopenharmony_ci	 * context, can be still running concurrently and scheduling new work
394662306a36Sopenharmony_ci	 * items onto hbus->wq in hv_pci_devices_present() and
394762306a36Sopenharmony_ci	 * hv_pci_eject_device(), and the work item handlers can access the
394862306a36Sopenharmony_ci	 * vmbus channel, which can be being closed by hv_pci_suspend(), e.g.
394962306a36Sopenharmony_ci	 * the work item handler pci_devices_present_work() ->
395062306a36Sopenharmony_ci	 * new_pcichild_device() writes to the vmbus channel.
395162306a36Sopenharmony_ci	 *
395262306a36Sopenharmony_ci	 * To eliminate the race, hv_pci_suspend() disables the channel
395362306a36Sopenharmony_ci	 * callback tasklet, sets hbus->state to hv_pcibus_removing, and
395462306a36Sopenharmony_ci	 * re-enables the tasklet. This way, when hv_pci_suspend() proceeds,
395562306a36Sopenharmony_ci	 * it knows that no new work item can be scheduled, and then it flushes
395662306a36Sopenharmony_ci	 * hbus->wq and safely closes the vmbus channel.
395762306a36Sopenharmony_ci	 */
395862306a36Sopenharmony_ci	tasklet_disable(&hdev->channel->callback_event);
395962306a36Sopenharmony_ci
396062306a36Sopenharmony_ci	/* Change the hbus state to prevent new work items. */
396162306a36Sopenharmony_ci	old_state = hbus->state;
396262306a36Sopenharmony_ci	if (hbus->state == hv_pcibus_installed)
396362306a36Sopenharmony_ci		hbus->state = hv_pcibus_removing;
396462306a36Sopenharmony_ci
396562306a36Sopenharmony_ci	tasklet_enable(&hdev->channel->callback_event);
396662306a36Sopenharmony_ci
396762306a36Sopenharmony_ci	if (old_state != hv_pcibus_installed)
396862306a36Sopenharmony_ci		return -EINVAL;
396962306a36Sopenharmony_ci
397062306a36Sopenharmony_ci	flush_workqueue(hbus->wq);
397162306a36Sopenharmony_ci
397262306a36Sopenharmony_ci	ret = hv_pci_bus_exit(hdev, true);
397362306a36Sopenharmony_ci	if (ret)
397462306a36Sopenharmony_ci		return ret;
397562306a36Sopenharmony_ci
397662306a36Sopenharmony_ci	vmbus_close(hdev->channel);
397762306a36Sopenharmony_ci
397862306a36Sopenharmony_ci	return 0;
397962306a36Sopenharmony_ci}
398062306a36Sopenharmony_ci
398162306a36Sopenharmony_cistatic int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg)
398262306a36Sopenharmony_ci{
398362306a36Sopenharmony_ci	struct irq_data *irq_data;
398462306a36Sopenharmony_ci	struct msi_desc *entry;
398562306a36Sopenharmony_ci	int ret = 0;
398662306a36Sopenharmony_ci
398762306a36Sopenharmony_ci	if (!pdev->msi_enabled && !pdev->msix_enabled)
398862306a36Sopenharmony_ci		return 0;
398962306a36Sopenharmony_ci
399062306a36Sopenharmony_ci	msi_lock_descs(&pdev->dev);
399162306a36Sopenharmony_ci	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
399262306a36Sopenharmony_ci		irq_data = irq_get_irq_data(entry->irq);
399362306a36Sopenharmony_ci		if (WARN_ON_ONCE(!irq_data)) {
399462306a36Sopenharmony_ci			ret = -EINVAL;
399562306a36Sopenharmony_ci			break;
399662306a36Sopenharmony_ci		}
399762306a36Sopenharmony_ci
399862306a36Sopenharmony_ci		hv_compose_msi_msg(irq_data, &entry->msg);
399962306a36Sopenharmony_ci	}
400062306a36Sopenharmony_ci	msi_unlock_descs(&pdev->dev);
400162306a36Sopenharmony_ci
400262306a36Sopenharmony_ci	return ret;
400362306a36Sopenharmony_ci}
400462306a36Sopenharmony_ci
400562306a36Sopenharmony_ci/*
400662306a36Sopenharmony_ci * Upon resume, pci_restore_msi_state() -> ... ->  __pci_write_msi_msg()
400762306a36Sopenharmony_ci * directly writes the MSI/MSI-X registers via MMIO, but since Hyper-V
400862306a36Sopenharmony_ci * doesn't trap and emulate the MMIO accesses, here hv_compose_msi_msg()
400962306a36Sopenharmony_ci * must be used to ask Hyper-V to re-create the IOMMU Interrupt Remapping
401062306a36Sopenharmony_ci * Table entries.
401162306a36Sopenharmony_ci */
401262306a36Sopenharmony_cistatic void hv_pci_restore_msi_state(struct hv_pcibus_device *hbus)
401362306a36Sopenharmony_ci{
401462306a36Sopenharmony_ci	pci_walk_bus(hbus->bridge->bus, hv_pci_restore_msi_msg, NULL);
401562306a36Sopenharmony_ci}
401662306a36Sopenharmony_ci
401762306a36Sopenharmony_cistatic int hv_pci_resume(struct hv_device *hdev)
401862306a36Sopenharmony_ci{
401962306a36Sopenharmony_ci	struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
402062306a36Sopenharmony_ci	enum pci_protocol_version_t version[1];
402162306a36Sopenharmony_ci	int ret;
402262306a36Sopenharmony_ci
402362306a36Sopenharmony_ci	hbus->state = hv_pcibus_init;
402462306a36Sopenharmony_ci
402562306a36Sopenharmony_ci	hdev->channel->next_request_id_callback = vmbus_next_request_id;
402662306a36Sopenharmony_ci	hdev->channel->request_addr_callback = vmbus_request_addr;
402762306a36Sopenharmony_ci	hdev->channel->rqstor_size = HV_PCI_RQSTOR_SIZE;
402862306a36Sopenharmony_ci
402962306a36Sopenharmony_ci	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
403062306a36Sopenharmony_ci			 hv_pci_onchannelcallback, hbus);
403162306a36Sopenharmony_ci	if (ret)
403262306a36Sopenharmony_ci		return ret;
403362306a36Sopenharmony_ci
403462306a36Sopenharmony_ci	/* Only use the version that was in use before hibernation. */
403562306a36Sopenharmony_ci	version[0] = hbus->protocol_version;
403662306a36Sopenharmony_ci	ret = hv_pci_protocol_negotiation(hdev, version, 1);
403762306a36Sopenharmony_ci	if (ret)
403862306a36Sopenharmony_ci		goto out;
403962306a36Sopenharmony_ci
404062306a36Sopenharmony_ci	ret = hv_pci_query_relations(hdev);
404162306a36Sopenharmony_ci	if (ret)
404262306a36Sopenharmony_ci		goto out;
404362306a36Sopenharmony_ci
404462306a36Sopenharmony_ci	mutex_lock(&hbus->state_lock);
404562306a36Sopenharmony_ci
404662306a36Sopenharmony_ci	ret = hv_pci_enter_d0(hdev);
404762306a36Sopenharmony_ci	if (ret)
404862306a36Sopenharmony_ci		goto release_state_lock;
404962306a36Sopenharmony_ci
405062306a36Sopenharmony_ci	ret = hv_send_resources_allocated(hdev);
405162306a36Sopenharmony_ci	if (ret)
405262306a36Sopenharmony_ci		goto release_state_lock;
405362306a36Sopenharmony_ci
405462306a36Sopenharmony_ci	prepopulate_bars(hbus);
405562306a36Sopenharmony_ci
405662306a36Sopenharmony_ci	hv_pci_restore_msi_state(hbus);
405762306a36Sopenharmony_ci
405862306a36Sopenharmony_ci	hbus->state = hv_pcibus_installed;
405962306a36Sopenharmony_ci	mutex_unlock(&hbus->state_lock);
406062306a36Sopenharmony_ci	return 0;
406162306a36Sopenharmony_ci
406262306a36Sopenharmony_cirelease_state_lock:
406362306a36Sopenharmony_ci	mutex_unlock(&hbus->state_lock);
406462306a36Sopenharmony_ciout:
406562306a36Sopenharmony_ci	vmbus_close(hdev->channel);
406662306a36Sopenharmony_ci	return ret;
406762306a36Sopenharmony_ci}
406862306a36Sopenharmony_ci
406962306a36Sopenharmony_cistatic const struct hv_vmbus_device_id hv_pci_id_table[] = {
407062306a36Sopenharmony_ci	/* PCI Pass-through Class ID */
407162306a36Sopenharmony_ci	/* 44C4F61D-4444-4400-9D52-802E27EDE19F */
407262306a36Sopenharmony_ci	{ HV_PCIE_GUID, },
407362306a36Sopenharmony_ci	{ },
407462306a36Sopenharmony_ci};
407562306a36Sopenharmony_ci
407662306a36Sopenharmony_ciMODULE_DEVICE_TABLE(vmbus, hv_pci_id_table);
407762306a36Sopenharmony_ci
407862306a36Sopenharmony_cistatic struct hv_driver hv_pci_drv = {
407962306a36Sopenharmony_ci	.name		= "hv_pci",
408062306a36Sopenharmony_ci	.id_table	= hv_pci_id_table,
408162306a36Sopenharmony_ci	.probe		= hv_pci_probe,
408262306a36Sopenharmony_ci	.remove		= hv_pci_remove,
408362306a36Sopenharmony_ci	.suspend	= hv_pci_suspend,
408462306a36Sopenharmony_ci	.resume		= hv_pci_resume,
408562306a36Sopenharmony_ci};
408662306a36Sopenharmony_ci
408762306a36Sopenharmony_cistatic void __exit exit_hv_pci_drv(void)
408862306a36Sopenharmony_ci{
408962306a36Sopenharmony_ci	vmbus_driver_unregister(&hv_pci_drv);
409062306a36Sopenharmony_ci
409162306a36Sopenharmony_ci	hvpci_block_ops.read_block = NULL;
409262306a36Sopenharmony_ci	hvpci_block_ops.write_block = NULL;
409362306a36Sopenharmony_ci	hvpci_block_ops.reg_blk_invalidate = NULL;
409462306a36Sopenharmony_ci}
409562306a36Sopenharmony_ci
409662306a36Sopenharmony_cistatic int __init init_hv_pci_drv(void)
409762306a36Sopenharmony_ci{
409862306a36Sopenharmony_ci	int ret;
409962306a36Sopenharmony_ci
410062306a36Sopenharmony_ci	if (!hv_is_hyperv_initialized())
410162306a36Sopenharmony_ci		return -ENODEV;
410262306a36Sopenharmony_ci
410362306a36Sopenharmony_ci	ret = hv_pci_irqchip_init();
410462306a36Sopenharmony_ci	if (ret)
410562306a36Sopenharmony_ci		return ret;
410662306a36Sopenharmony_ci
410762306a36Sopenharmony_ci	/* Set the invalid domain number's bit, so it will not be used */
410862306a36Sopenharmony_ci	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
410962306a36Sopenharmony_ci
411062306a36Sopenharmony_ci	/* Initialize PCI block r/w interface */
411162306a36Sopenharmony_ci	hvpci_block_ops.read_block = hv_read_config_block;
411262306a36Sopenharmony_ci	hvpci_block_ops.write_block = hv_write_config_block;
411362306a36Sopenharmony_ci	hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
411462306a36Sopenharmony_ci
411562306a36Sopenharmony_ci	return vmbus_driver_register(&hv_pci_drv);
411662306a36Sopenharmony_ci}
411762306a36Sopenharmony_ci
411862306a36Sopenharmony_cimodule_init(init_hv_pci_drv);
411962306a36Sopenharmony_cimodule_exit(exit_hv_pci_drv);
412062306a36Sopenharmony_ci
412162306a36Sopenharmony_ciMODULE_DESCRIPTION("Hyper-V PCI");
412262306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
4123