1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2009, Microsoft Corporation.
4 *
5 * Authors:
6 *   Haiyang Zhang <haiyangz@microsoft.com>
7 *   Hank Janssen  <hjanssen@microsoft.com>
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/interrupt.h>
13#include <linux/sched.h>
14#include <linux/wait.h>
15#include <linux/mm.h>
16#include <linux/slab.h>
17#include <linux/list.h>
18#include <linux/module.h>
19#include <linux/completion.h>
20#include <linux/delay.h>
21#include <linux/cpu.h>
22#include <linux/hyperv.h>
23#include <asm/mshyperv.h>
24
25#include "hyperv_vmbus.h"
26
27static void init_vp_index(struct vmbus_channel *channel);
28
29const struct vmbus_device vmbus_devs[] = {
30	/* IDE */
31	{ .dev_type = HV_IDE,
32	  HV_IDE_GUID,
33	  .perf_device = true,
34	},
35
36	/* SCSI */
37	{ .dev_type = HV_SCSI,
38	  HV_SCSI_GUID,
39	  .perf_device = true,
40	},
41
42	/* Fibre Channel */
43	{ .dev_type = HV_FC,
44	  HV_SYNTHFC_GUID,
45	  .perf_device = true,
46	},
47
48	/* Synthetic NIC */
49	{ .dev_type = HV_NIC,
50	  HV_NIC_GUID,
51	  .perf_device = true,
52	},
53
54	/* Network Direct */
55	{ .dev_type = HV_ND,
56	  HV_ND_GUID,
57	  .perf_device = true,
58	},
59
60	/* PCIE */
61	{ .dev_type = HV_PCIE,
62	  HV_PCIE_GUID,
63	  .perf_device = false,
64	},
65
66	/* Synthetic Frame Buffer */
67	{ .dev_type = HV_FB,
68	  HV_SYNTHVID_GUID,
69	  .perf_device = false,
70	},
71
72	/* Synthetic Keyboard */
73	{ .dev_type = HV_KBD,
74	  HV_KBD_GUID,
75	  .perf_device = false,
76	},
77
78	/* Synthetic MOUSE */
79	{ .dev_type = HV_MOUSE,
80	  HV_MOUSE_GUID,
81	  .perf_device = false,
82	},
83
84	/* KVP */
85	{ .dev_type = HV_KVP,
86	  HV_KVP_GUID,
87	  .perf_device = false,
88	},
89
90	/* Time Synch */
91	{ .dev_type = HV_TS,
92	  HV_TS_GUID,
93	  .perf_device = false,
94	},
95
96	/* Heartbeat */
97	{ .dev_type = HV_HB,
98	  HV_HEART_BEAT_GUID,
99	  .perf_device = false,
100	},
101
102	/* Shutdown */
103	{ .dev_type = HV_SHUTDOWN,
104	  HV_SHUTDOWN_GUID,
105	  .perf_device = false,
106	},
107
108	/* File copy */
109	{ .dev_type = HV_FCOPY,
110	  HV_FCOPY_GUID,
111	  .perf_device = false,
112	},
113
114	/* Backup */
115	{ .dev_type = HV_BACKUP,
116	  HV_VSS_GUID,
117	  .perf_device = false,
118	},
119
120	/* Dynamic Memory */
121	{ .dev_type = HV_DM,
122	  HV_DM_GUID,
123	  .perf_device = false,
124	},
125
126	/* Unknown GUID */
127	{ .dev_type = HV_UNKNOWN,
128	  .perf_device = false,
129	},
130};
131
132static const struct {
133	guid_t guid;
134} vmbus_unsupported_devs[] = {
135	{ HV_AVMA1_GUID },
136	{ HV_AVMA2_GUID },
137	{ HV_RDV_GUID	},
138};
139
140/*
141 * The rescinded channel may be blocked waiting for a response from the host;
142 * take care of that.
143 */
144static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
145{
146	struct vmbus_channel_msginfo *msginfo;
147	unsigned long flags;
148
149
150	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
151	channel->rescind = true;
152	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
153				msglistentry) {
154
155		if (msginfo->waiting_channel == channel) {
156			complete(&msginfo->waitevent);
157			break;
158		}
159	}
160	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
161}
162
163static bool is_unsupported_vmbus_devs(const guid_t *guid)
164{
165	int i;
166
167	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
168		if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
169			return true;
170	return false;
171}
172
173static u16 hv_get_dev_type(const struct vmbus_channel *channel)
174{
175	const guid_t *guid = &channel->offermsg.offer.if_type;
176	u16 i;
177
178	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
179		return HV_UNKNOWN;
180
181	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
182		if (guid_equal(guid, &vmbus_devs[i].guid))
183			return i;
184	}
185	pr_info("Unknown GUID: %pUl\n", guid);
186	return i;
187}
188
189/**
190 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
191 * @icmsghdrp: Pointer to msg header structure
192 * @buf: Raw buffer channel data
193 * @fw_version: The framework versions we can support.
194 * @fw_vercnt: The size of @fw_version.
195 * @srv_version: The service versions we can support.
196 * @srv_vercnt: The size of @srv_version.
197 * @nego_fw_version: The selected framework version.
198 * @nego_srv_version: The selected service version.
199 *
200 * Note: Versions are given in decreasing order.
201 *
202 * Set up and fill in default negotiate response message.
203 * Mainly used by Hyper-V drivers.
204 */
205bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
206				u8 *buf, const int *fw_version, int fw_vercnt,
207				const int *srv_version, int srv_vercnt,
208				int *nego_fw_version, int *nego_srv_version)
209{
210	int icframe_major, icframe_minor;
211	int icmsg_major, icmsg_minor;
212	int fw_major, fw_minor;
213	int srv_major, srv_minor;
214	int i, j;
215	bool found_match = false;
216	struct icmsg_negotiate *negop;
217
218	icmsghdrp->icmsgsize = 0x10;
219	negop = (struct icmsg_negotiate *)&buf[
220		sizeof(struct vmbuspipe_hdr) +
221		sizeof(struct icmsg_hdr)];
222
223	icframe_major = negop->icframe_vercnt;
224	icframe_minor = 0;
225
226	icmsg_major = negop->icmsg_vercnt;
227	icmsg_minor = 0;
228
229	/*
230	 * Select the framework version number we will
231	 * support.
232	 */
233
234	for (i = 0; i < fw_vercnt; i++) {
235		fw_major = (fw_version[i] >> 16);
236		fw_minor = (fw_version[i] & 0xFFFF);
237
238		for (j = 0; j < negop->icframe_vercnt; j++) {
239			if ((negop->icversion_data[j].major == fw_major) &&
240			    (negop->icversion_data[j].minor == fw_minor)) {
241				icframe_major = negop->icversion_data[j].major;
242				icframe_minor = negop->icversion_data[j].minor;
243				found_match = true;
244				break;
245			}
246		}
247
248		if (found_match)
249			break;
250	}
251
252	if (!found_match)
253		goto fw_error;
254
255	found_match = false;
256
257	for (i = 0; i < srv_vercnt; i++) {
258		srv_major = (srv_version[i] >> 16);
259		srv_minor = (srv_version[i] & 0xFFFF);
260
261		for (j = negop->icframe_vercnt;
262			(j < negop->icframe_vercnt + negop->icmsg_vercnt);
263			j++) {
264
265			if ((negop->icversion_data[j].major == srv_major) &&
266				(negop->icversion_data[j].minor == srv_minor)) {
267
268				icmsg_major = negop->icversion_data[j].major;
269				icmsg_minor = negop->icversion_data[j].minor;
270				found_match = true;
271				break;
272			}
273		}
274
275		if (found_match)
276			break;
277	}
278
279	/*
280	 * Respond with the framework and service
281	 * version numbers we can support.
282	 */
283
284fw_error:
285	if (!found_match) {
286		negop->icframe_vercnt = 0;
287		negop->icmsg_vercnt = 0;
288	} else {
289		negop->icframe_vercnt = 1;
290		negop->icmsg_vercnt = 1;
291	}
292
293	if (nego_fw_version)
294		*nego_fw_version = (icframe_major << 16) | icframe_minor;
295
296	if (nego_srv_version)
297		*nego_srv_version = (icmsg_major << 16) | icmsg_minor;
298
299	negop->icversion_data[0].major = icframe_major;
300	negop->icversion_data[0].minor = icframe_minor;
301	negop->icversion_data[1].major = icmsg_major;
302	negop->icversion_data[1].minor = icmsg_minor;
303	return found_match;
304}
305
306EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
307
308/*
309 * alloc_channel - Allocate and initialize a vmbus channel object
310 */
311static struct vmbus_channel *alloc_channel(void)
312{
313	struct vmbus_channel *channel;
314
315	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
316	if (!channel)
317		return NULL;
318
319	spin_lock_init(&channel->sched_lock);
320	init_completion(&channel->rescind_event);
321
322	INIT_LIST_HEAD(&channel->sc_list);
323
324	tasklet_init(&channel->callback_event,
325		     vmbus_on_event, (unsigned long)channel);
326
327	hv_ringbuffer_pre_init(channel);
328
329	return channel;
330}
331
332/*
333 * free_channel - Release the resources used by the vmbus channel object
334 */
335static void free_channel(struct vmbus_channel *channel)
336{
337	tasklet_kill(&channel->callback_event);
338	vmbus_remove_channel_attr_group(channel);
339
340	kobject_put(&channel->kobj);
341}
342
343void vmbus_channel_map_relid(struct vmbus_channel *channel)
344{
345	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
346		return;
347	/*
348	 * The mapping of the channel's relid is visible from the CPUs that
349	 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
350	 * execute:
351	 *
352	 *  (a) In the "normal (i.e., not resuming from hibernation)" path,
353	 *      the full barrier in virt_store_mb() guarantees that the store
354	 *      is propagated to all CPUs before the add_channel_work work
355	 *      is queued.  In turn, add_channel_work is queued before the
356	 *      channel's ring buffer is allocated/initialized and the
357	 *      OPENCHANNEL message for the channel is sent in vmbus_open().
358	 *      Hyper-V won't start sending the interrupts for the channel
359	 *      before the OPENCHANNEL message is acked.  The memory barrier
360	 *      in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
361	 *      that vmbus_chan_sched() must find the channel's relid in
362	 *      recv_int_page before retrieving the channel pointer from the
363	 *      array of channels.
364	 *
365	 *  (b) In the "resuming from hibernation" path, the virt_store_mb()
366	 *      guarantees that the store is propagated to all CPUs before
367	 *      the VMBus connection is marked as ready for the resume event
368	 *      (cf. check_ready_for_resume_event()).  The interrupt handler
369	 *      of the VMBus driver and vmbus_chan_sched() can not run before
370	 *      vmbus_bus_resume() has completed execution (cf. resume_noirq).
371	 */
372	virt_store_mb(
373		vmbus_connection.channels[channel->offermsg.child_relid],
374		channel);
375}
376
377void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
378{
379	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
380		return;
381	WRITE_ONCE(
382		vmbus_connection.channels[channel->offermsg.child_relid],
383		NULL);
384}
385
386static void vmbus_release_relid(u32 relid)
387{
388	struct vmbus_channel_relid_released msg;
389	int ret;
390
391	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
392	msg.child_relid = relid;
393	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
394	ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
395			     true);
396
397	trace_vmbus_release_relid(&msg, ret);
398}
399
400void hv_process_channel_removal(struct vmbus_channel *channel)
401{
402	lockdep_assert_held(&vmbus_connection.channel_mutex);
403	BUG_ON(!channel->rescind);
404
405	/*
406	 * hv_process_channel_removal() could find INVALID_RELID only for
407	 * hv_sock channels.  See the inline comments in vmbus_onoffer().
408	 */
409	WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
410		!is_hvsock_channel(channel));
411
412	/*
413	 * Upon suspend, an in-use hv_sock channel is removed from the array of
414	 * channels and the relid is invalidated.  After hibernation, when the
415	 * user-space appplication destroys the channel, it's unnecessary and
416	 * unsafe to remove the channel from the array of channels.  See also
417	 * the inline comments before the call of vmbus_release_relid() below.
418	 */
419	if (channel->offermsg.child_relid != INVALID_RELID)
420		vmbus_channel_unmap_relid(channel);
421
422	if (channel->primary_channel == NULL)
423		list_del(&channel->listentry);
424	else
425		list_del(&channel->sc_list);
426
427	/*
428	 * If this is a "perf" channel, updates the hv_numa_map[] masks so that
429	 * init_vp_index() can (re-)use the CPU.
430	 */
431	if (hv_is_perf_channel(channel))
432		hv_clear_alloced_cpu(channel->target_cpu);
433
434	/*
435	 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
436	 * the relid is invalidated; after hibernation, when the user-space app
437	 * destroys the channel, the relid is INVALID_RELID, and in this case
438	 * it's unnecessary and unsafe to release the old relid, since the same
439	 * relid can refer to a completely different channel now.
440	 */
441	if (channel->offermsg.child_relid != INVALID_RELID)
442		vmbus_release_relid(channel->offermsg.child_relid);
443
444	free_channel(channel);
445}
446
447void vmbus_free_channels(void)
448{
449	struct vmbus_channel *channel, *tmp;
450
451	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
452		listentry) {
453		/* hv_process_channel_removal() needs this */
454		channel->rescind = true;
455
456		vmbus_device_unregister(channel->device_obj);
457	}
458}
459
460/* Note: the function can run concurrently for primary/sub channels. */
461static void vmbus_add_channel_work(struct work_struct *work)
462{
463	struct vmbus_channel *newchannel =
464		container_of(work, struct vmbus_channel, add_channel_work);
465	struct vmbus_channel *primary_channel = newchannel->primary_channel;
466	int ret;
467
468	/*
469	 * This state is used to indicate a successful open
470	 * so that when we do close the channel normally, we
471	 * can cleanup properly.
472	 */
473	newchannel->state = CHANNEL_OPEN_STATE;
474
475	if (primary_channel != NULL) {
476		/* newchannel is a sub-channel. */
477		struct hv_device *dev = primary_channel->device_obj;
478
479		if (vmbus_add_channel_kobj(dev, newchannel))
480			goto err_deq_chan;
481
482		if (primary_channel->sc_creation_callback != NULL)
483			primary_channel->sc_creation_callback(newchannel);
484
485		newchannel->probe_done = true;
486		return;
487	}
488
489	/*
490	 * Start the process of binding the primary channel to the driver
491	 */
492	newchannel->device_obj = vmbus_device_create(
493		&newchannel->offermsg.offer.if_type,
494		&newchannel->offermsg.offer.if_instance,
495		newchannel);
496	if (!newchannel->device_obj)
497		goto err_deq_chan;
498
499	newchannel->device_obj->device_id = newchannel->device_id;
500	/*
501	 * Add the new device to the bus. This will kick off device-driver
502	 * binding which eventually invokes the device driver's AddDevice()
503	 * method.
504	 *
505	 * If vmbus_device_register() fails, the 'device_obj' is freed in
506	 * vmbus_device_release() as called by device_unregister() in the
507	 * error path of vmbus_device_register(). In the outside error
508	 * path, there's no need to free it.
509	 */
510	ret = vmbus_device_register(newchannel->device_obj);
511
512	if (ret != 0) {
513		pr_err("unable to add child device object (relid %d)\n",
514			newchannel->offermsg.child_relid);
515		goto err_deq_chan;
516	}
517
518	newchannel->probe_done = true;
519	return;
520
521err_deq_chan:
522	mutex_lock(&vmbus_connection.channel_mutex);
523
524	/*
525	 * We need to set the flag, otherwise
526	 * vmbus_onoffer_rescind() can be blocked.
527	 */
528	newchannel->probe_done = true;
529
530	if (primary_channel == NULL)
531		list_del(&newchannel->listentry);
532	else
533		list_del(&newchannel->sc_list);
534
535	/* vmbus_process_offer() has mapped the channel. */
536	vmbus_channel_unmap_relid(newchannel);
537
538	mutex_unlock(&vmbus_connection.channel_mutex);
539
540	vmbus_release_relid(newchannel->offermsg.child_relid);
541
542	free_channel(newchannel);
543}
544
545/*
546 * vmbus_process_offer - Process the offer by creating a channel/device
547 * associated with this offer
548 */
549static void vmbus_process_offer(struct vmbus_channel *newchannel)
550{
551	struct vmbus_channel *channel;
552	struct workqueue_struct *wq;
553	bool fnew = true;
554
555	/*
556	 * Synchronize vmbus_process_offer() and CPU hotplugging:
557	 *
558	 * CPU1				CPU2
559	 *
560	 * [vmbus_process_offer()]	[Hot removal of the CPU]
561	 *
562	 * CPU_READ_LOCK		CPUS_WRITE_LOCK
563	 * LOAD cpu_online_mask		SEARCH chn_list
564	 * STORE target_cpu		LOAD target_cpu
565	 * INSERT chn_list		STORE cpu_online_mask
566	 * CPUS_READ_UNLOCK		CPUS_WRITE_UNLOCK
567	 *
568	 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
569	 * 		CPU2's SEARCH from *not* seeing CPU1's INSERT
570	 *
571	 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
572	 * 		CPU2's LOAD from *not* seing CPU1's STORE
573	 */
574	cpus_read_lock();
575
576	/*
577	 * Serializes the modifications of the chn_list list as well as
578	 * the accesses to next_numa_node_id in init_vp_index().
579	 */
580	mutex_lock(&vmbus_connection.channel_mutex);
581
582	init_vp_index(newchannel);
583
584	/* Remember the channels that should be cleaned up upon suspend. */
585	if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
586		atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
587
588	/*
589	 * Now that we have acquired the channel_mutex,
590	 * we can release the potentially racing rescind thread.
591	 */
592	atomic_dec(&vmbus_connection.offer_in_progress);
593
594	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
595		if (guid_equal(&channel->offermsg.offer.if_type,
596			       &newchannel->offermsg.offer.if_type) &&
597		    guid_equal(&channel->offermsg.offer.if_instance,
598			       &newchannel->offermsg.offer.if_instance)) {
599			fnew = false;
600			break;
601		}
602	}
603
604	if (fnew) {
605		list_add_tail(&newchannel->listentry,
606			      &vmbus_connection.chn_list);
607	} else {
608		/*
609		 * Check to see if this is a valid sub-channel.
610		 */
611		if (newchannel->offermsg.offer.sub_channel_index == 0) {
612			mutex_unlock(&vmbus_connection.channel_mutex);
613			cpus_read_unlock();
614			/*
615			 * Don't call free_channel(), because newchannel->kobj
616			 * is not initialized yet.
617			 */
618			kfree(newchannel);
619			WARN_ON_ONCE(1);
620			return;
621		}
622		/*
623		 * Process the sub-channel.
624		 */
625		newchannel->primary_channel = channel;
626		list_add_tail(&newchannel->sc_list, &channel->sc_list);
627	}
628
629	vmbus_channel_map_relid(newchannel);
630
631	mutex_unlock(&vmbus_connection.channel_mutex);
632	cpus_read_unlock();
633
634	/*
635	 * vmbus_process_offer() mustn't call channel->sc_creation_callback()
636	 * directly for sub-channels, because sc_creation_callback() ->
637	 * vmbus_open() may never get the host's response to the
638	 * OPEN_CHANNEL message (the host may rescind a channel at any time,
639	 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
640	 * may not wake up the vmbus_open() as it's blocked due to a non-zero
641	 * vmbus_connection.offer_in_progress, and finally we have a deadlock.
642	 *
643	 * The above is also true for primary channels, if the related device
644	 * drivers use sync probing mode by default.
645	 *
646	 * And, usually the handling of primary channels and sub-channels can
647	 * depend on each other, so we should offload them to different
648	 * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
649	 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
650	 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
651	 * and waits for all the sub-channels to appear, but the latter
652	 * can't get the rtnl_lock and this blocks the handling of
653	 * sub-channels.
654	 */
655	INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
656	wq = fnew ? vmbus_connection.handle_primary_chan_wq :
657		    vmbus_connection.handle_sub_chan_wq;
658	queue_work(wq, &newchannel->add_channel_work);
659}
660
661/*
662 * We use this state to statically distribute the channel interrupt load.
663 */
664static int next_numa_node_id;
665
666/*
667 * Starting with Win8, we can statically distribute the incoming
668 * channel interrupt load by binding a channel to VCPU.
669 *
670 * For pre-win8 hosts or non-performance critical channels we assign the
671 * VMBUS_CONNECT_CPU.
672 *
673 * Starting with win8, performance critical channels will be distributed
674 * evenly among all the available NUMA nodes.  Once the node is assigned,
675 * we will assign the CPU based on a simple round robin scheme.
676 */
677static void init_vp_index(struct vmbus_channel *channel)
678{
679	bool perf_chn = hv_is_perf_channel(channel);
680	cpumask_var_t available_mask;
681	struct cpumask *alloced_mask;
682	u32 target_cpu;
683	int numa_node;
684
685	if ((vmbus_proto_version == VERSION_WS2008) ||
686	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) ||
687	    !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
688		/*
689		 * Prior to win8, all channel interrupts are
690		 * delivered on VMBUS_CONNECT_CPU.
691		 * Also if the channel is not a performance critical
692		 * channel, bind it to VMBUS_CONNECT_CPU.
693		 * In case alloc_cpumask_var() fails, bind it to
694		 * VMBUS_CONNECT_CPU.
695		 */
696		channel->target_cpu = VMBUS_CONNECT_CPU;
697		if (perf_chn)
698			hv_set_alloced_cpu(VMBUS_CONNECT_CPU);
699		return;
700	}
701
702	while (true) {
703		numa_node = next_numa_node_id++;
704		if (numa_node == nr_node_ids) {
705			next_numa_node_id = 0;
706			continue;
707		}
708		if (cpumask_empty(cpumask_of_node(numa_node)))
709			continue;
710		break;
711	}
712	alloced_mask = &hv_context.hv_numa_map[numa_node];
713
714	if (cpumask_weight(alloced_mask) ==
715	    cpumask_weight(cpumask_of_node(numa_node))) {
716		/*
717		 * We have cycled through all the CPUs in the node;
718		 * reset the alloced map.
719		 */
720		cpumask_clear(alloced_mask);
721	}
722
723	cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node));
724
725	target_cpu = cpumask_first(available_mask);
726	cpumask_set_cpu(target_cpu, alloced_mask);
727
728	channel->target_cpu = target_cpu;
729
730	free_cpumask_var(available_mask);
731}
732
733#define UNLOAD_DELAY_UNIT_MS	10		/* 10 milliseconds */
734#define UNLOAD_WAIT_MS		(100*1000)	/* 100 seconds */
735#define UNLOAD_WAIT_LOOPS	(UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
736#define UNLOAD_MSG_MS		(5*1000)	/* Every 5 seconds */
737#define UNLOAD_MSG_LOOPS	(UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
738
739static void vmbus_wait_for_unload(void)
740{
741	int cpu;
742	void *page_addr;
743	struct hv_message *msg;
744	struct vmbus_channel_message_header *hdr;
745	u32 message_type, i;
746
747	/*
748	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
749	 * used for initial contact or to CPU0 depending on host version. When
750	 * we're crashing on a different CPU let's hope that IRQ handler on
751	 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
752	 * functional and vmbus_unload_response() will complete
753	 * vmbus_connection.unload_event. If not, the last thing we can do is
754	 * read message pages for all CPUs directly.
755	 *
756	 * Wait up to 100 seconds since an Azure host must writeback any dirty
757	 * data in its disk cache before the VMbus UNLOAD request will
758	 * complete. This flushing has been empirically observed to take up
759	 * to 50 seconds in cases with a lot of dirty data, so allow additional
760	 * leeway and for inaccuracies in mdelay(). But eventually time out so
761	 * that the panic path can't get hung forever in case the response
762	 * message isn't seen.
763	 */
764	for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
765		if (completion_done(&vmbus_connection.unload_event))
766			goto completed;
767
768		for_each_present_cpu(cpu) {
769			struct hv_per_cpu_context *hv_cpu
770				= per_cpu_ptr(hv_context.cpu_context, cpu);
771
772			/*
773			 * In a CoCo VM the synic_message_page is not allocated
774			 * in hv_synic_alloc(). Instead it is set/cleared in
775			 * hv_synic_enable_regs() and hv_synic_disable_regs()
776			 * such that it is set only when the CPU is online. If
777			 * not all present CPUs are online, the message page
778			 * might be NULL, so skip such CPUs.
779			 */
780			page_addr = hv_cpu->synic_message_page;
781			if (!page_addr)
782				continue;
783
784			msg = (struct hv_message *)page_addr
785				+ VMBUS_MESSAGE_SINT;
786
787			message_type = READ_ONCE(msg->header.message_type);
788			if (message_type == HVMSG_NONE)
789				continue;
790
791			hdr = (struct vmbus_channel_message_header *)
792				msg->u.payload;
793
794			if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
795				complete(&vmbus_connection.unload_event);
796
797			vmbus_signal_eom(msg, message_type);
798		}
799
800		/*
801		 * Give a notice periodically so someone watching the
802		 * serial output won't think it is completely hung.
803		 */
804		if (!(i % UNLOAD_MSG_LOOPS))
805			pr_notice("Waiting for VMBus UNLOAD to complete\n");
806
807		mdelay(UNLOAD_DELAY_UNIT_MS);
808	}
809	pr_err("Continuing even though VMBus UNLOAD did not complete\n");
810
811completed:
812	/*
813	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
814	 * maybe-pending messages on all CPUs to be able to receive new
815	 * messages after we reconnect.
816	 */
817	for_each_present_cpu(cpu) {
818		struct hv_per_cpu_context *hv_cpu
819			= per_cpu_ptr(hv_context.cpu_context, cpu);
820
821		page_addr = hv_cpu->synic_message_page;
822		if (!page_addr)
823			continue;
824
825		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
826		msg->header.message_type = HVMSG_NONE;
827	}
828}
829
830/*
831 * vmbus_unload_response - Handler for the unload response.
832 */
833static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
834{
835	/*
836	 * This is a global event; just wakeup the waiting thread.
837	 * Once we successfully unload, we can cleanup the monitor state.
838	 */
839	complete(&vmbus_connection.unload_event);
840}
841
842void vmbus_initiate_unload(bool crash)
843{
844	struct vmbus_channel_message_header hdr;
845
846	if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
847		return;
848
849	/* Pre-Win2012R2 hosts don't support reconnect */
850	if (vmbus_proto_version < VERSION_WIN8_1)
851		return;
852
853	init_completion(&vmbus_connection.unload_event);
854	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
855	hdr.msgtype = CHANNELMSG_UNLOAD;
856	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
857		       !crash);
858
859	/*
860	 * vmbus_initiate_unload() is also called on crash and the crash can be
861	 * happening in an interrupt context, where scheduling is impossible.
862	 */
863	if (!crash)
864		wait_for_completion(&vmbus_connection.unload_event);
865	else
866		vmbus_wait_for_unload();
867}
868
869static void check_ready_for_resume_event(void)
870{
871	/*
872	 * If all the old primary channels have been fixed up, then it's safe
873	 * to resume.
874	 */
875	if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
876		complete(&vmbus_connection.ready_for_resume_event);
877}
878
879static void vmbus_setup_channel_state(struct vmbus_channel *channel,
880				      struct vmbus_channel_offer_channel *offer)
881{
882	/*
883	 * Setup state for signalling the host.
884	 */
885	channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
886
887	if (vmbus_proto_version != VERSION_WS2008) {
888		channel->is_dedicated_interrupt =
889				(offer->is_dedicated_interrupt != 0);
890		channel->sig_event = offer->connection_id;
891	}
892
893	memcpy(&channel->offermsg, offer,
894	       sizeof(struct vmbus_channel_offer_channel));
895	channel->monitor_grp = (u8)offer->monitorid / 32;
896	channel->monitor_bit = (u8)offer->monitorid % 32;
897	channel->device_id = hv_get_dev_type(channel);
898}
899
900/*
901 * find_primary_channel_by_offer - Get the channel object given the new offer.
902 * This is only used in the resume path of hibernation.
903 */
904static struct vmbus_channel *
905find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
906{
907	struct vmbus_channel *channel = NULL, *iter;
908	const guid_t *inst1, *inst2;
909
910	/* Ignore sub-channel offers. */
911	if (offer->offer.sub_channel_index != 0)
912		return NULL;
913
914	mutex_lock(&vmbus_connection.channel_mutex);
915
916	list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
917		inst1 = &iter->offermsg.offer.if_instance;
918		inst2 = &offer->offer.if_instance;
919
920		if (guid_equal(inst1, inst2)) {
921			channel = iter;
922			break;
923		}
924	}
925
926	mutex_unlock(&vmbus_connection.channel_mutex);
927
928	return channel;
929}
930
931/*
932 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
933 *
934 */
935static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
936{
937	struct vmbus_channel_offer_channel *offer;
938	struct vmbus_channel *oldchannel, *newchannel;
939	size_t offer_sz;
940
941	offer = (struct vmbus_channel_offer_channel *)hdr;
942
943	trace_vmbus_onoffer(offer);
944
945	oldchannel = find_primary_channel_by_offer(offer);
946
947	if (oldchannel != NULL) {
948		/*
949		 * We're resuming from hibernation: all the sub-channel and
950		 * hv_sock channels we had before the hibernation should have
951		 * been cleaned up, and now we must be seeing a re-offered
952		 * primary channel that we had before the hibernation.
953		 */
954
955		/*
956		 * { Initially: channel relid = INVALID_RELID,
957		 *		channels[valid_relid] = NULL }
958		 *
959		 * CPU1					CPU2
960		 *
961		 * [vmbus_onoffer()]			[vmbus_device_release()]
962		 *
963		 * LOCK channel_mutex			LOCK channel_mutex
964		 * STORE channel relid = valid_relid	LOAD r1 = channel relid
965		 * MAP_RELID channel			if (r1 != INVALID_RELID)
966		 * UNLOCK channel_mutex			  UNMAP_RELID channel
967		 *					UNLOCK channel_mutex
968		 *
969		 * Forbids: r1 == valid_relid &&
970		 * 		channels[valid_relid] == channel
971		 *
972		 * Note.  r1 can be INVALID_RELID only for an hv_sock channel.
973		 * None of the hv_sock channels which were present before the
974		 * suspend are re-offered upon the resume.  See the WARN_ON()
975		 * in hv_process_channel_removal().
976		 */
977		mutex_lock(&vmbus_connection.channel_mutex);
978
979		atomic_dec(&vmbus_connection.offer_in_progress);
980
981		WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
982		/* Fix up the relid. */
983		oldchannel->offermsg.child_relid = offer->child_relid;
984
985		offer_sz = sizeof(*offer);
986		if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
987			/*
988			 * This is not an error, since the host can also change
989			 * the other field(s) of the offer, e.g. on WS RS5
990			 * (Build 17763), the offer->connection_id of the
991			 * Mellanox VF vmbus device can change when the host
992			 * reoffers the device upon resume.
993			 */
994			pr_debug("vmbus offer changed: relid=%d\n",
995				 offer->child_relid);
996
997			print_hex_dump_debug("Old vmbus offer: ",
998					     DUMP_PREFIX_OFFSET, 16, 4,
999					     &oldchannel->offermsg, offer_sz,
1000					     false);
1001			print_hex_dump_debug("New vmbus offer: ",
1002					     DUMP_PREFIX_OFFSET, 16, 4,
1003					     offer, offer_sz, false);
1004
1005			/* Fix up the old channel. */
1006			vmbus_setup_channel_state(oldchannel, offer);
1007		}
1008
1009		/* Add the channel back to the array of channels. */
1010		vmbus_channel_map_relid(oldchannel);
1011		check_ready_for_resume_event();
1012
1013		mutex_unlock(&vmbus_connection.channel_mutex);
1014		return;
1015	}
1016
1017	/* Allocate the channel object and save this offer. */
1018	newchannel = alloc_channel();
1019	if (!newchannel) {
1020		vmbus_release_relid(offer->child_relid);
1021		atomic_dec(&vmbus_connection.offer_in_progress);
1022		pr_err("Unable to allocate channel object\n");
1023		return;
1024	}
1025
1026	vmbus_setup_channel_state(newchannel, offer);
1027
1028	vmbus_process_offer(newchannel);
1029}
1030
1031static void check_ready_for_suspend_event(void)
1032{
1033	/*
1034	 * If all the sub-channels or hv_sock channels have been cleaned up,
1035	 * then it's safe to suspend.
1036	 */
1037	if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
1038		complete(&vmbus_connection.ready_for_suspend_event);
1039}
1040
1041/*
1042 * vmbus_onoffer_rescind - Rescind offer handler.
1043 *
1044 * We queue a work item to process this offer synchronously
1045 */
1046static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
1047{
1048	struct vmbus_channel_rescind_offer *rescind;
1049	struct vmbus_channel *channel;
1050	struct device *dev;
1051	bool clean_up_chan_for_suspend;
1052
1053	rescind = (struct vmbus_channel_rescind_offer *)hdr;
1054
1055	trace_vmbus_onoffer_rescind(rescind);
1056
1057	/*
1058	 * The offer msg and the corresponding rescind msg
1059	 * from the host are guranteed to be ordered -
1060	 * offer comes in first and then the rescind.
1061	 * Since we process these events in work elements,
1062	 * and with preemption, we may end up processing
1063	 * the events out of order.  We rely on the synchronization
1064	 * provided by offer_in_progress and by channel_mutex for
1065	 * ordering these events:
1066	 *
1067	 * { Initially: offer_in_progress = 1 }
1068	 *
1069	 * CPU1				CPU2
1070	 *
1071	 * [vmbus_onoffer()]		[vmbus_onoffer_rescind()]
1072	 *
1073	 * LOCK channel_mutex		WAIT_ON offer_in_progress == 0
1074	 * DECREMENT offer_in_progress	LOCK channel_mutex
1075	 * STORE channels[]		LOAD channels[]
1076	 * UNLOCK channel_mutex		UNLOCK channel_mutex
1077	 *
1078	 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
1079	 */
1080
1081	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
1082		/*
1083		 * We wait here until any channel offer is currently
1084		 * being processed.
1085		 */
1086		msleep(1);
1087	}
1088
1089	mutex_lock(&vmbus_connection.channel_mutex);
1090	channel = relid2channel(rescind->child_relid);
1091	mutex_unlock(&vmbus_connection.channel_mutex);
1092
1093	if (channel == NULL) {
1094		/*
1095		 * We failed in processing the offer message;
1096		 * we would have cleaned up the relid in that
1097		 * failure path.
1098		 */
1099		return;
1100	}
1101
1102	clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
1103				    is_sub_channel(channel);
1104	/*
1105	 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
1106	 * should make sure the channel callback is not running any more.
1107	 */
1108	vmbus_reset_channel_cb(channel);
1109
1110	/*
1111	 * Now wait for offer handling to complete.
1112	 */
1113	vmbus_rescind_cleanup(channel);
1114	while (READ_ONCE(channel->probe_done) == false) {
1115		/*
1116		 * We wait here until any channel offer is currently
1117		 * being processed.
1118		 */
1119		msleep(1);
1120	}
1121
1122	/*
1123	 * At this point, the rescind handling can proceed safely.
1124	 */
1125
1126	if (channel->device_obj) {
1127		if (channel->chn_rescind_callback) {
1128			channel->chn_rescind_callback(channel);
1129
1130			if (clean_up_chan_for_suspend)
1131				check_ready_for_suspend_event();
1132
1133			return;
1134		}
1135		/*
1136		 * We will have to unregister this device from the
1137		 * driver core.
1138		 */
1139		dev = get_device(&channel->device_obj->device);
1140		if (dev) {
1141			vmbus_device_unregister(channel->device_obj);
1142			put_device(dev);
1143		}
1144	} else if (channel->primary_channel != NULL) {
1145		/*
1146		 * Sub-channel is being rescinded. Following is the channel
1147		 * close sequence when initiated from the driveri (refer to
1148		 * vmbus_close() for details):
1149		 * 1. Close all sub-channels first
1150		 * 2. Then close the primary channel.
1151		 */
1152		mutex_lock(&vmbus_connection.channel_mutex);
1153		if (channel->state == CHANNEL_OPEN_STATE) {
1154			/*
1155			 * The channel is currently not open;
1156			 * it is safe for us to cleanup the channel.
1157			 */
1158			hv_process_channel_removal(channel);
1159		} else {
1160			complete(&channel->rescind_event);
1161		}
1162		mutex_unlock(&vmbus_connection.channel_mutex);
1163	}
1164
1165	/* The "channel" may have been freed. Do not access it any longer. */
1166
1167	if (clean_up_chan_for_suspend)
1168		check_ready_for_suspend_event();
1169}
1170
1171void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
1172{
1173	BUG_ON(!is_hvsock_channel(channel));
1174
1175	/* We always get a rescind msg when a connection is closed. */
1176	while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
1177		msleep(1);
1178
1179	vmbus_device_unregister(channel->device_obj);
1180}
1181EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
1182
1183
1184/*
1185 * vmbus_onoffers_delivered -
1186 * This is invoked when all offers have been delivered.
1187 *
1188 * Nothing to do here.
1189 */
1190static void vmbus_onoffers_delivered(
1191			struct vmbus_channel_message_header *hdr)
1192{
1193}
1194
1195/*
1196 * vmbus_onopen_result - Open result handler.
1197 *
1198 * This is invoked when we received a response to our channel open request.
1199 * Find the matching request, copy the response and signal the requesting
1200 * thread.
1201 */
1202static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
1203{
1204	struct vmbus_channel_open_result *result;
1205	struct vmbus_channel_msginfo *msginfo;
1206	struct vmbus_channel_message_header *requestheader;
1207	struct vmbus_channel_open_channel *openmsg;
1208	unsigned long flags;
1209
1210	result = (struct vmbus_channel_open_result *)hdr;
1211
1212	trace_vmbus_onopen_result(result);
1213
1214	/*
1215	 * Find the open msg, copy the result and signal/unblock the wait event
1216	 */
1217	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1218
1219	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1220				msglistentry) {
1221		requestheader =
1222			(struct vmbus_channel_message_header *)msginfo->msg;
1223
1224		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
1225			openmsg =
1226			(struct vmbus_channel_open_channel *)msginfo->msg;
1227			if (openmsg->child_relid == result->child_relid &&
1228			    openmsg->openid == result->openid) {
1229				memcpy(&msginfo->response.open_result,
1230				       result,
1231				       sizeof(
1232					struct vmbus_channel_open_result));
1233				complete(&msginfo->waitevent);
1234				break;
1235			}
1236		}
1237	}
1238	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1239}
1240
1241/*
1242 * vmbus_ongpadl_created - GPADL created handler.
1243 *
1244 * This is invoked when we received a response to our gpadl create request.
1245 * Find the matching request, copy the response and signal the requesting
1246 * thread.
1247 */
1248static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
1249{
1250	struct vmbus_channel_gpadl_created *gpadlcreated;
1251	struct vmbus_channel_msginfo *msginfo;
1252	struct vmbus_channel_message_header *requestheader;
1253	struct vmbus_channel_gpadl_header *gpadlheader;
1254	unsigned long flags;
1255
1256	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
1257
1258	trace_vmbus_ongpadl_created(gpadlcreated);
1259
1260	/*
1261	 * Find the establish msg, copy the result and signal/unblock the wait
1262	 * event
1263	 */
1264	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1265
1266	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1267				msglistentry) {
1268		requestheader =
1269			(struct vmbus_channel_message_header *)msginfo->msg;
1270
1271		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
1272			gpadlheader =
1273			(struct vmbus_channel_gpadl_header *)requestheader;
1274
1275			if ((gpadlcreated->child_relid ==
1276			     gpadlheader->child_relid) &&
1277			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
1278				memcpy(&msginfo->response.gpadl_created,
1279				       gpadlcreated,
1280				       sizeof(
1281					struct vmbus_channel_gpadl_created));
1282				complete(&msginfo->waitevent);
1283				break;
1284			}
1285		}
1286	}
1287	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1288}
1289
1290/*
1291 * vmbus_ongpadl_torndown - GPADL torndown handler.
1292 *
1293 * This is invoked when we received a response to our gpadl teardown request.
1294 * Find the matching request, copy the response and signal the requesting
1295 * thread.
1296 */
1297static void vmbus_ongpadl_torndown(
1298			struct vmbus_channel_message_header *hdr)
1299{
1300	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1301	struct vmbus_channel_msginfo *msginfo;
1302	struct vmbus_channel_message_header *requestheader;
1303	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1304	unsigned long flags;
1305
1306	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1307
1308	trace_vmbus_ongpadl_torndown(gpadl_torndown);
1309
1310	/*
1311	 * Find the open msg, copy the result and signal/unblock the wait event
1312	 */
1313	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1314
1315	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1316				msglistentry) {
1317		requestheader =
1318			(struct vmbus_channel_message_header *)msginfo->msg;
1319
1320		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1321			gpadl_teardown =
1322			(struct vmbus_channel_gpadl_teardown *)requestheader;
1323
1324			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1325				memcpy(&msginfo->response.gpadl_torndown,
1326				       gpadl_torndown,
1327				       sizeof(
1328					struct vmbus_channel_gpadl_torndown));
1329				complete(&msginfo->waitevent);
1330				break;
1331			}
1332		}
1333	}
1334	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1335}
1336
1337/*
1338 * vmbus_onversion_response - Version response handler
1339 *
1340 * This is invoked when we received a response to our initiate contact request.
1341 * Find the matching request, copy the response and signal the requesting
1342 * thread.
1343 */
1344static void vmbus_onversion_response(
1345		struct vmbus_channel_message_header *hdr)
1346{
1347	struct vmbus_channel_msginfo *msginfo;
1348	struct vmbus_channel_message_header *requestheader;
1349	struct vmbus_channel_version_response *version_response;
1350	unsigned long flags;
1351
1352	version_response = (struct vmbus_channel_version_response *)hdr;
1353
1354	trace_vmbus_onversion_response(version_response);
1355
1356	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1357
1358	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1359				msglistentry) {
1360		requestheader =
1361			(struct vmbus_channel_message_header *)msginfo->msg;
1362
1363		if (requestheader->msgtype ==
1364		    CHANNELMSG_INITIATE_CONTACT) {
1365			memcpy(&msginfo->response.version_response,
1366			      version_response,
1367			      sizeof(struct vmbus_channel_version_response));
1368			complete(&msginfo->waitevent);
1369		}
1370	}
1371	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1372}
1373
1374/* Channel message dispatch table */
1375const struct vmbus_channel_message_table_entry
1376channel_message_table[CHANNELMSG_COUNT] = {
1377	{ CHANNELMSG_INVALID,			0, NULL, 0},
1378	{ CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer,
1379		sizeof(struct vmbus_channel_offer_channel)},
1380	{ CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind,
1381		sizeof(struct vmbus_channel_rescind_offer) },
1382	{ CHANNELMSG_REQUESTOFFERS,		0, NULL, 0},
1383	{ CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered, 0},
1384	{ CHANNELMSG_OPENCHANNEL,		0, NULL, 0},
1385	{ CHANNELMSG_OPENCHANNEL_RESULT,	1, vmbus_onopen_result,
1386		sizeof(struct vmbus_channel_open_result)},
1387	{ CHANNELMSG_CLOSECHANNEL,		0, NULL, 0},
1388	{ CHANNELMSG_GPADL_HEADER,		0, NULL, 0},
1389	{ CHANNELMSG_GPADL_BODY,		0, NULL, 0},
1390	{ CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created,
1391		sizeof(struct vmbus_channel_gpadl_created)},
1392	{ CHANNELMSG_GPADL_TEARDOWN,		0, NULL, 0},
1393	{ CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown,
1394		sizeof(struct vmbus_channel_gpadl_torndown) },
1395	{ CHANNELMSG_RELID_RELEASED,		0, NULL, 0},
1396	{ CHANNELMSG_INITIATE_CONTACT,		0, NULL, 0},
1397	{ CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response,
1398		sizeof(struct vmbus_channel_version_response)},
1399	{ CHANNELMSG_UNLOAD,			0, NULL, 0},
1400	{ CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response, 0},
1401	{ CHANNELMSG_18,			0, NULL, 0},
1402	{ CHANNELMSG_19,			0, NULL, 0},
1403	{ CHANNELMSG_20,			0, NULL, 0},
1404	{ CHANNELMSG_TL_CONNECT_REQUEST,	0, NULL, 0},
1405	{ CHANNELMSG_MODIFYCHANNEL,		0, NULL, 0},
1406	{ CHANNELMSG_TL_CONNECT_RESULT,		0, NULL, 0},
1407};
1408
1409/*
1410 * vmbus_onmessage - Handler for channel protocol messages.
1411 *
1412 * This is invoked in the vmbus worker thread context.
1413 */
1414void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
1415{
1416	trace_vmbus_on_message(hdr);
1417
1418	/*
1419	 * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
1420	 * out of bound and the message_handler pointer can not be NULL.
1421	 */
1422	channel_message_table[hdr->msgtype].message_handler(hdr);
1423}
1424
1425/*
1426 * vmbus_request_offers - Send a request to get all our pending offers.
1427 */
1428int vmbus_request_offers(void)
1429{
1430	struct vmbus_channel_message_header *msg;
1431	struct vmbus_channel_msginfo *msginfo;
1432	int ret;
1433
1434	msginfo = kmalloc(sizeof(*msginfo) +
1435			  sizeof(struct vmbus_channel_message_header),
1436			  GFP_KERNEL);
1437	if (!msginfo)
1438		return -ENOMEM;
1439
1440	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1441
1442	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1443
1444	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1445			     true);
1446
1447	trace_vmbus_request_offers(ret);
1448
1449	if (ret != 0) {
1450		pr_err("Unable to request offers - %d\n", ret);
1451
1452		goto cleanup;
1453	}
1454
1455cleanup:
1456	kfree(msginfo);
1457
1458	return ret;
1459}
1460
1461static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1462{
1463	struct list_head *cur, *tmp;
1464	struct vmbus_channel *cur_channel;
1465
1466	if (primary_channel->sc_creation_callback == NULL)
1467		return;
1468
1469	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1470		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1471
1472		primary_channel->sc_creation_callback(cur_channel);
1473	}
1474}
1475
1476void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1477				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1478{
1479	primary_channel->sc_creation_callback = sc_cr_cb;
1480}
1481EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1482
1483bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1484{
1485	bool ret;
1486
1487	ret = !list_empty(&primary->sc_list);
1488
1489	if (ret) {
1490		/*
1491		 * Invoke the callback on sub-channel creation.
1492		 * This will present a uniform interface to the
1493		 * clients.
1494		 */
1495		invoke_sc_cb(primary);
1496	}
1497
1498	return ret;
1499}
1500EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1501
1502void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1503		void (*chn_rescind_cb)(struct vmbus_channel *))
1504{
1505	channel->chn_rescind_callback = chn_rescind_cb;
1506}
1507EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
1508