1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (c) 2009, Microsoft Corporation. 4 * 5 * Authors: 6 * Haiyang Zhang <haiyangz@microsoft.com> 7 * Hank Janssen <hjanssen@microsoft.com> 8 * K. Y. Srinivasan <kys@microsoft.com> 9 */ 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12#include <linux/init.h> 13#include <linux/module.h> 14#include <linux/device.h> 15#include <linux/interrupt.h> 16#include <linux/sysctl.h> 17#include <linux/slab.h> 18#include <linux/acpi.h> 19#include <linux/completion.h> 20#include <linux/hyperv.h> 21#include <linux/kernel_stat.h> 22#include <linux/clockchips.h> 23#include <linux/cpu.h> 24#include <linux/sched/task_stack.h> 25 26#include <linux/delay.h> 27#include <linux/notifier.h> 28#include <linux/ptrace.h> 29#include <linux/screen_info.h> 30#include <linux/kdebug.h> 31#include <linux/efi.h> 32#include <linux/random.h> 33#include <linux/kernel.h> 34#include <linux/syscore_ops.h> 35#include <clocksource/hyperv_timer.h> 36#include "hyperv_vmbus.h" 37 38struct vmbus_dynid { 39 struct list_head node; 40 struct hv_vmbus_device_id id; 41}; 42 43static struct acpi_device *hv_acpi_dev; 44 45static struct completion probe_event; 46 47static int hyperv_cpuhp_online; 48 49static void *hv_panic_page; 50 51/* Values parsed from ACPI DSDT */ 52static int vmbus_irq; 53int vmbus_interrupt; 54 55/* 56 * Boolean to control whether to report panic messages over Hyper-V. 57 * 58 * It can be set via /proc/sys/kernel/hyperv/record_panic_msg 59 */ 60static int sysctl_record_panic_msg = 1; 61 62static int hyperv_report_reg(void) 63{ 64 return !sysctl_record_panic_msg || !hv_panic_page; 65} 66 67static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, 68 void *args) 69{ 70 struct pt_regs *regs; 71 72 vmbus_initiate_unload(true); 73 74 /* 75 * Hyper-V should be notified only once about a panic. If we will be 76 * doing hyperv_report_panic_msg() later with kmsg data, don't do 77 * the notification here. 78 */ 79 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 80 && hyperv_report_reg()) { 81 regs = current_pt_regs(); 82 hyperv_report_panic(regs, val, false); 83 } 84 return NOTIFY_DONE; 85} 86 87static int hyperv_die_event(struct notifier_block *nb, unsigned long val, 88 void *args) 89{ 90 struct die_args *die = args; 91 struct pt_regs *regs = die->regs; 92 93 /* Don't notify Hyper-V if the die event is other than oops */ 94 if (val != DIE_OOPS) 95 return NOTIFY_DONE; 96 97 /* 98 * Hyper-V should be notified only once about a panic. If we will be 99 * doing hyperv_report_panic_msg() later with kmsg data, don't do 100 * the notification here. 101 */ 102 if (hyperv_report_reg()) 103 hyperv_report_panic(regs, val, true); 104 return NOTIFY_DONE; 105} 106 107static struct notifier_block hyperv_die_block = { 108 .notifier_call = hyperv_die_event, 109}; 110static struct notifier_block hyperv_panic_block = { 111 .notifier_call = hyperv_panic_event, 112}; 113 114static const char *fb_mmio_name = "fb_range"; 115static struct resource *fb_mmio; 116static struct resource *hyperv_mmio; 117static DEFINE_MUTEX(hyperv_mmio_lock); 118 119static int vmbus_exists(void) 120{ 121 if (hv_acpi_dev == NULL) 122 return -ENODEV; 123 124 return 0; 125} 126 127static u8 channel_monitor_group(const struct vmbus_channel *channel) 128{ 129 return (u8)channel->offermsg.monitorid / 32; 130} 131 132static u8 channel_monitor_offset(const struct vmbus_channel *channel) 133{ 134 return (u8)channel->offermsg.monitorid % 32; 135} 136 137static u32 channel_pending(const struct vmbus_channel *channel, 138 const struct hv_monitor_page *monitor_page) 139{ 140 u8 monitor_group = channel_monitor_group(channel); 141 142 return monitor_page->trigger_group[monitor_group].pending; 143} 144 145static u32 channel_latency(const struct vmbus_channel *channel, 146 const struct hv_monitor_page *monitor_page) 147{ 148 u8 monitor_group = channel_monitor_group(channel); 149 u8 monitor_offset = channel_monitor_offset(channel); 150 151 return monitor_page->latency[monitor_group][monitor_offset]; 152} 153 154static u32 channel_conn_id(struct vmbus_channel *channel, 155 struct hv_monitor_page *monitor_page) 156{ 157 u8 monitor_group = channel_monitor_group(channel); 158 u8 monitor_offset = channel_monitor_offset(channel); 159 return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; 160} 161 162static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, 163 char *buf) 164{ 165 struct hv_device *hv_dev = device_to_hv_device(dev); 166 167 if (!hv_dev->channel) 168 return -ENODEV; 169 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); 170} 171static DEVICE_ATTR_RO(id); 172 173static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, 174 char *buf) 175{ 176 struct hv_device *hv_dev = device_to_hv_device(dev); 177 178 if (!hv_dev->channel) 179 return -ENODEV; 180 return sprintf(buf, "%d\n", hv_dev->channel->state); 181} 182static DEVICE_ATTR_RO(state); 183 184static ssize_t monitor_id_show(struct device *dev, 185 struct device_attribute *dev_attr, char *buf) 186{ 187 struct hv_device *hv_dev = device_to_hv_device(dev); 188 189 if (!hv_dev->channel) 190 return -ENODEV; 191 return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid); 192} 193static DEVICE_ATTR_RO(monitor_id); 194 195static ssize_t class_id_show(struct device *dev, 196 struct device_attribute *dev_attr, char *buf) 197{ 198 struct hv_device *hv_dev = device_to_hv_device(dev); 199 200 if (!hv_dev->channel) 201 return -ENODEV; 202 return sprintf(buf, "{%pUl}\n", 203 &hv_dev->channel->offermsg.offer.if_type); 204} 205static DEVICE_ATTR_RO(class_id); 206 207static ssize_t device_id_show(struct device *dev, 208 struct device_attribute *dev_attr, char *buf) 209{ 210 struct hv_device *hv_dev = device_to_hv_device(dev); 211 212 if (!hv_dev->channel) 213 return -ENODEV; 214 return sprintf(buf, "{%pUl}\n", 215 &hv_dev->channel->offermsg.offer.if_instance); 216} 217static DEVICE_ATTR_RO(device_id); 218 219static ssize_t modalias_show(struct device *dev, 220 struct device_attribute *dev_attr, char *buf) 221{ 222 struct hv_device *hv_dev = device_to_hv_device(dev); 223 224 return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type); 225} 226static DEVICE_ATTR_RO(modalias); 227 228#ifdef CONFIG_NUMA 229static ssize_t numa_node_show(struct device *dev, 230 struct device_attribute *attr, char *buf) 231{ 232 struct hv_device *hv_dev = device_to_hv_device(dev); 233 234 if (!hv_dev->channel) 235 return -ENODEV; 236 237 return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu)); 238} 239static DEVICE_ATTR_RO(numa_node); 240#endif 241 242static ssize_t server_monitor_pending_show(struct device *dev, 243 struct device_attribute *dev_attr, 244 char *buf) 245{ 246 struct hv_device *hv_dev = device_to_hv_device(dev); 247 248 if (!hv_dev->channel) 249 return -ENODEV; 250 return sprintf(buf, "%d\n", 251 channel_pending(hv_dev->channel, 252 vmbus_connection.monitor_pages[0])); 253} 254static DEVICE_ATTR_RO(server_monitor_pending); 255 256static ssize_t client_monitor_pending_show(struct device *dev, 257 struct device_attribute *dev_attr, 258 char *buf) 259{ 260 struct hv_device *hv_dev = device_to_hv_device(dev); 261 262 if (!hv_dev->channel) 263 return -ENODEV; 264 return sprintf(buf, "%d\n", 265 channel_pending(hv_dev->channel, 266 vmbus_connection.monitor_pages[1])); 267} 268static DEVICE_ATTR_RO(client_monitor_pending); 269 270static ssize_t server_monitor_latency_show(struct device *dev, 271 struct device_attribute *dev_attr, 272 char *buf) 273{ 274 struct hv_device *hv_dev = device_to_hv_device(dev); 275 276 if (!hv_dev->channel) 277 return -ENODEV; 278 return sprintf(buf, "%d\n", 279 channel_latency(hv_dev->channel, 280 vmbus_connection.monitor_pages[0])); 281} 282static DEVICE_ATTR_RO(server_monitor_latency); 283 284static ssize_t client_monitor_latency_show(struct device *dev, 285 struct device_attribute *dev_attr, 286 char *buf) 287{ 288 struct hv_device *hv_dev = device_to_hv_device(dev); 289 290 if (!hv_dev->channel) 291 return -ENODEV; 292 return sprintf(buf, "%d\n", 293 channel_latency(hv_dev->channel, 294 vmbus_connection.monitor_pages[1])); 295} 296static DEVICE_ATTR_RO(client_monitor_latency); 297 298static ssize_t server_monitor_conn_id_show(struct device *dev, 299 struct device_attribute *dev_attr, 300 char *buf) 301{ 302 struct hv_device *hv_dev = device_to_hv_device(dev); 303 304 if (!hv_dev->channel) 305 return -ENODEV; 306 return sprintf(buf, "%d\n", 307 channel_conn_id(hv_dev->channel, 308 vmbus_connection.monitor_pages[0])); 309} 310static DEVICE_ATTR_RO(server_monitor_conn_id); 311 312static ssize_t client_monitor_conn_id_show(struct device *dev, 313 struct device_attribute *dev_attr, 314 char *buf) 315{ 316 struct hv_device *hv_dev = device_to_hv_device(dev); 317 318 if (!hv_dev->channel) 319 return -ENODEV; 320 return sprintf(buf, "%d\n", 321 channel_conn_id(hv_dev->channel, 322 vmbus_connection.monitor_pages[1])); 323} 324static DEVICE_ATTR_RO(client_monitor_conn_id); 325 326static ssize_t out_intr_mask_show(struct device *dev, 327 struct device_attribute *dev_attr, char *buf) 328{ 329 struct hv_device *hv_dev = device_to_hv_device(dev); 330 struct hv_ring_buffer_debug_info outbound; 331 int ret; 332 333 if (!hv_dev->channel) 334 return -ENODEV; 335 336 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 337 &outbound); 338 if (ret < 0) 339 return ret; 340 341 return sprintf(buf, "%d\n", outbound.current_interrupt_mask); 342} 343static DEVICE_ATTR_RO(out_intr_mask); 344 345static ssize_t out_read_index_show(struct device *dev, 346 struct device_attribute *dev_attr, char *buf) 347{ 348 struct hv_device *hv_dev = device_to_hv_device(dev); 349 struct hv_ring_buffer_debug_info outbound; 350 int ret; 351 352 if (!hv_dev->channel) 353 return -ENODEV; 354 355 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 356 &outbound); 357 if (ret < 0) 358 return ret; 359 return sprintf(buf, "%d\n", outbound.current_read_index); 360} 361static DEVICE_ATTR_RO(out_read_index); 362 363static ssize_t out_write_index_show(struct device *dev, 364 struct device_attribute *dev_attr, 365 char *buf) 366{ 367 struct hv_device *hv_dev = device_to_hv_device(dev); 368 struct hv_ring_buffer_debug_info outbound; 369 int ret; 370 371 if (!hv_dev->channel) 372 return -ENODEV; 373 374 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 375 &outbound); 376 if (ret < 0) 377 return ret; 378 return sprintf(buf, "%d\n", outbound.current_write_index); 379} 380static DEVICE_ATTR_RO(out_write_index); 381 382static ssize_t out_read_bytes_avail_show(struct device *dev, 383 struct device_attribute *dev_attr, 384 char *buf) 385{ 386 struct hv_device *hv_dev = device_to_hv_device(dev); 387 struct hv_ring_buffer_debug_info outbound; 388 int ret; 389 390 if (!hv_dev->channel) 391 return -ENODEV; 392 393 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 394 &outbound); 395 if (ret < 0) 396 return ret; 397 return sprintf(buf, "%d\n", outbound.bytes_avail_toread); 398} 399static DEVICE_ATTR_RO(out_read_bytes_avail); 400 401static ssize_t out_write_bytes_avail_show(struct device *dev, 402 struct device_attribute *dev_attr, 403 char *buf) 404{ 405 struct hv_device *hv_dev = device_to_hv_device(dev); 406 struct hv_ring_buffer_debug_info outbound; 407 int ret; 408 409 if (!hv_dev->channel) 410 return -ENODEV; 411 412 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, 413 &outbound); 414 if (ret < 0) 415 return ret; 416 return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); 417} 418static DEVICE_ATTR_RO(out_write_bytes_avail); 419 420static ssize_t in_intr_mask_show(struct device *dev, 421 struct device_attribute *dev_attr, char *buf) 422{ 423 struct hv_device *hv_dev = device_to_hv_device(dev); 424 struct hv_ring_buffer_debug_info inbound; 425 int ret; 426 427 if (!hv_dev->channel) 428 return -ENODEV; 429 430 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 431 if (ret < 0) 432 return ret; 433 434 return sprintf(buf, "%d\n", inbound.current_interrupt_mask); 435} 436static DEVICE_ATTR_RO(in_intr_mask); 437 438static ssize_t in_read_index_show(struct device *dev, 439 struct device_attribute *dev_attr, char *buf) 440{ 441 struct hv_device *hv_dev = device_to_hv_device(dev); 442 struct hv_ring_buffer_debug_info inbound; 443 int ret; 444 445 if (!hv_dev->channel) 446 return -ENODEV; 447 448 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 449 if (ret < 0) 450 return ret; 451 452 return sprintf(buf, "%d\n", inbound.current_read_index); 453} 454static DEVICE_ATTR_RO(in_read_index); 455 456static ssize_t in_write_index_show(struct device *dev, 457 struct device_attribute *dev_attr, char *buf) 458{ 459 struct hv_device *hv_dev = device_to_hv_device(dev); 460 struct hv_ring_buffer_debug_info inbound; 461 int ret; 462 463 if (!hv_dev->channel) 464 return -ENODEV; 465 466 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 467 if (ret < 0) 468 return ret; 469 470 return sprintf(buf, "%d\n", inbound.current_write_index); 471} 472static DEVICE_ATTR_RO(in_write_index); 473 474static ssize_t in_read_bytes_avail_show(struct device *dev, 475 struct device_attribute *dev_attr, 476 char *buf) 477{ 478 struct hv_device *hv_dev = device_to_hv_device(dev); 479 struct hv_ring_buffer_debug_info inbound; 480 int ret; 481 482 if (!hv_dev->channel) 483 return -ENODEV; 484 485 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 486 if (ret < 0) 487 return ret; 488 489 return sprintf(buf, "%d\n", inbound.bytes_avail_toread); 490} 491static DEVICE_ATTR_RO(in_read_bytes_avail); 492 493static ssize_t in_write_bytes_avail_show(struct device *dev, 494 struct device_attribute *dev_attr, 495 char *buf) 496{ 497 struct hv_device *hv_dev = device_to_hv_device(dev); 498 struct hv_ring_buffer_debug_info inbound; 499 int ret; 500 501 if (!hv_dev->channel) 502 return -ENODEV; 503 504 ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); 505 if (ret < 0) 506 return ret; 507 508 return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); 509} 510static DEVICE_ATTR_RO(in_write_bytes_avail); 511 512static ssize_t channel_vp_mapping_show(struct device *dev, 513 struct device_attribute *dev_attr, 514 char *buf) 515{ 516 struct hv_device *hv_dev = device_to_hv_device(dev); 517 struct vmbus_channel *channel = hv_dev->channel, *cur_sc; 518 int buf_size = PAGE_SIZE, n_written, tot_written; 519 struct list_head *cur; 520 521 if (!channel) 522 return -ENODEV; 523 524 mutex_lock(&vmbus_connection.channel_mutex); 525 526 tot_written = snprintf(buf, buf_size, "%u:%u\n", 527 channel->offermsg.child_relid, channel->target_cpu); 528 529 list_for_each(cur, &channel->sc_list) { 530 if (tot_written >= buf_size - 1) 531 break; 532 533 cur_sc = list_entry(cur, struct vmbus_channel, sc_list); 534 n_written = scnprintf(buf + tot_written, 535 buf_size - tot_written, 536 "%u:%u\n", 537 cur_sc->offermsg.child_relid, 538 cur_sc->target_cpu); 539 tot_written += n_written; 540 } 541 542 mutex_unlock(&vmbus_connection.channel_mutex); 543 544 return tot_written; 545} 546static DEVICE_ATTR_RO(channel_vp_mapping); 547 548static ssize_t vendor_show(struct device *dev, 549 struct device_attribute *dev_attr, 550 char *buf) 551{ 552 struct hv_device *hv_dev = device_to_hv_device(dev); 553 return sprintf(buf, "0x%x\n", hv_dev->vendor_id); 554} 555static DEVICE_ATTR_RO(vendor); 556 557static ssize_t device_show(struct device *dev, 558 struct device_attribute *dev_attr, 559 char *buf) 560{ 561 struct hv_device *hv_dev = device_to_hv_device(dev); 562 return sprintf(buf, "0x%x\n", hv_dev->device_id); 563} 564static DEVICE_ATTR_RO(device); 565 566static ssize_t driver_override_store(struct device *dev, 567 struct device_attribute *attr, 568 const char *buf, size_t count) 569{ 570 struct hv_device *hv_dev = device_to_hv_device(dev); 571 char *driver_override, *old, *cp; 572 573 /* We need to keep extra room for a newline */ 574 if (count >= (PAGE_SIZE - 1)) 575 return -EINVAL; 576 577 driver_override = kstrndup(buf, count, GFP_KERNEL); 578 if (!driver_override) 579 return -ENOMEM; 580 581 cp = strchr(driver_override, '\n'); 582 if (cp) 583 *cp = '\0'; 584 585 device_lock(dev); 586 old = hv_dev->driver_override; 587 if (strlen(driver_override)) { 588 hv_dev->driver_override = driver_override; 589 } else { 590 kfree(driver_override); 591 hv_dev->driver_override = NULL; 592 } 593 device_unlock(dev); 594 595 kfree(old); 596 597 return count; 598} 599 600static ssize_t driver_override_show(struct device *dev, 601 struct device_attribute *attr, char *buf) 602{ 603 struct hv_device *hv_dev = device_to_hv_device(dev); 604 ssize_t len; 605 606 device_lock(dev); 607 len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override); 608 device_unlock(dev); 609 610 return len; 611} 612static DEVICE_ATTR_RW(driver_override); 613 614/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ 615static struct attribute *vmbus_dev_attrs[] = { 616 &dev_attr_id.attr, 617 &dev_attr_state.attr, 618 &dev_attr_monitor_id.attr, 619 &dev_attr_class_id.attr, 620 &dev_attr_device_id.attr, 621 &dev_attr_modalias.attr, 622#ifdef CONFIG_NUMA 623 &dev_attr_numa_node.attr, 624#endif 625 &dev_attr_server_monitor_pending.attr, 626 &dev_attr_client_monitor_pending.attr, 627 &dev_attr_server_monitor_latency.attr, 628 &dev_attr_client_monitor_latency.attr, 629 &dev_attr_server_monitor_conn_id.attr, 630 &dev_attr_client_monitor_conn_id.attr, 631 &dev_attr_out_intr_mask.attr, 632 &dev_attr_out_read_index.attr, 633 &dev_attr_out_write_index.attr, 634 &dev_attr_out_read_bytes_avail.attr, 635 &dev_attr_out_write_bytes_avail.attr, 636 &dev_attr_in_intr_mask.attr, 637 &dev_attr_in_read_index.attr, 638 &dev_attr_in_write_index.attr, 639 &dev_attr_in_read_bytes_avail.attr, 640 &dev_attr_in_write_bytes_avail.attr, 641 &dev_attr_channel_vp_mapping.attr, 642 &dev_attr_vendor.attr, 643 &dev_attr_device.attr, 644 &dev_attr_driver_override.attr, 645 NULL, 646}; 647 648/* 649 * Device-level attribute_group callback function. Returns the permission for 650 * each attribute, and returns 0 if an attribute is not visible. 651 */ 652static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, 653 struct attribute *attr, int idx) 654{ 655 struct device *dev = kobj_to_dev(kobj); 656 const struct hv_device *hv_dev = device_to_hv_device(dev); 657 658 /* Hide the monitor attributes if the monitor mechanism is not used. */ 659 if (!hv_dev->channel->offermsg.monitor_allocated && 660 (attr == &dev_attr_monitor_id.attr || 661 attr == &dev_attr_server_monitor_pending.attr || 662 attr == &dev_attr_client_monitor_pending.attr || 663 attr == &dev_attr_server_monitor_latency.attr || 664 attr == &dev_attr_client_monitor_latency.attr || 665 attr == &dev_attr_server_monitor_conn_id.attr || 666 attr == &dev_attr_client_monitor_conn_id.attr)) 667 return 0; 668 669 return attr->mode; 670} 671 672static const struct attribute_group vmbus_dev_group = { 673 .attrs = vmbus_dev_attrs, 674 .is_visible = vmbus_dev_attr_is_visible 675}; 676__ATTRIBUTE_GROUPS(vmbus_dev); 677 678/* 679 * vmbus_uevent - add uevent for our device 680 * 681 * This routine is invoked when a device is added or removed on the vmbus to 682 * generate a uevent to udev in the userspace. The udev will then look at its 683 * rule and the uevent generated here to load the appropriate driver 684 * 685 * The alias string will be of the form vmbus:guid where guid is the string 686 * representation of the device guid (each byte of the guid will be 687 * represented with two hex characters. 688 */ 689static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) 690{ 691 struct hv_device *dev = device_to_hv_device(device); 692 const char *format = "MODALIAS=vmbus:%*phN"; 693 694 return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); 695} 696 697static const struct hv_vmbus_device_id * 698hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) 699{ 700 if (id == NULL) 701 return NULL; /* empty device table */ 702 703 for (; !guid_is_null(&id->guid); id++) 704 if (guid_equal(&id->guid, guid)) 705 return id; 706 707 return NULL; 708} 709 710static const struct hv_vmbus_device_id * 711hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) 712{ 713 const struct hv_vmbus_device_id *id = NULL; 714 struct vmbus_dynid *dynid; 715 716 spin_lock(&drv->dynids.lock); 717 list_for_each_entry(dynid, &drv->dynids.list, node) { 718 if (guid_equal(&dynid->id.guid, guid)) { 719 id = &dynid->id; 720 break; 721 } 722 } 723 spin_unlock(&drv->dynids.lock); 724 725 return id; 726} 727 728static const struct hv_vmbus_device_id vmbus_device_null; 729 730/* 731 * Return a matching hv_vmbus_device_id pointer. 732 * If there is no match, return NULL. 733 */ 734static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, 735 struct hv_device *dev) 736{ 737 const guid_t *guid = &dev->dev_type; 738 const struct hv_vmbus_device_id *id; 739 740 /* When driver_override is set, only bind to the matching driver */ 741 if (dev->driver_override && strcmp(dev->driver_override, drv->name)) 742 return NULL; 743 744 /* Look at the dynamic ids first, before the static ones */ 745 id = hv_vmbus_dynid_match(drv, guid); 746 if (!id) 747 id = hv_vmbus_dev_match(drv->id_table, guid); 748 749 /* driver_override will always match, send a dummy id */ 750 if (!id && dev->driver_override) 751 id = &vmbus_device_null; 752 753 return id; 754} 755 756/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ 757static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) 758{ 759 struct vmbus_dynid *dynid; 760 761 dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); 762 if (!dynid) 763 return -ENOMEM; 764 765 dynid->id.guid = *guid; 766 767 spin_lock(&drv->dynids.lock); 768 list_add_tail(&dynid->node, &drv->dynids.list); 769 spin_unlock(&drv->dynids.lock); 770 771 return driver_attach(&drv->driver); 772} 773 774static void vmbus_free_dynids(struct hv_driver *drv) 775{ 776 struct vmbus_dynid *dynid, *n; 777 778 spin_lock(&drv->dynids.lock); 779 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 780 list_del(&dynid->node); 781 kfree(dynid); 782 } 783 spin_unlock(&drv->dynids.lock); 784} 785 786/* 787 * store_new_id - sysfs frontend to vmbus_add_dynid() 788 * 789 * Allow GUIDs to be added to an existing driver via sysfs. 790 */ 791static ssize_t new_id_store(struct device_driver *driver, const char *buf, 792 size_t count) 793{ 794 struct hv_driver *drv = drv_to_hv_drv(driver); 795 guid_t guid; 796 ssize_t retval; 797 798 retval = guid_parse(buf, &guid); 799 if (retval) 800 return retval; 801 802 if (hv_vmbus_dynid_match(drv, &guid)) 803 return -EEXIST; 804 805 retval = vmbus_add_dynid(drv, &guid); 806 if (retval) 807 return retval; 808 return count; 809} 810static DRIVER_ATTR_WO(new_id); 811 812/* 813 * store_remove_id - remove a PCI device ID from this driver 814 * 815 * Removes a dynamic pci device ID to this driver. 816 */ 817static ssize_t remove_id_store(struct device_driver *driver, const char *buf, 818 size_t count) 819{ 820 struct hv_driver *drv = drv_to_hv_drv(driver); 821 struct vmbus_dynid *dynid, *n; 822 guid_t guid; 823 ssize_t retval; 824 825 retval = guid_parse(buf, &guid); 826 if (retval) 827 return retval; 828 829 retval = -ENODEV; 830 spin_lock(&drv->dynids.lock); 831 list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { 832 struct hv_vmbus_device_id *id = &dynid->id; 833 834 if (guid_equal(&id->guid, &guid)) { 835 list_del(&dynid->node); 836 kfree(dynid); 837 retval = count; 838 break; 839 } 840 } 841 spin_unlock(&drv->dynids.lock); 842 843 return retval; 844} 845static DRIVER_ATTR_WO(remove_id); 846 847static struct attribute *vmbus_drv_attrs[] = { 848 &driver_attr_new_id.attr, 849 &driver_attr_remove_id.attr, 850 NULL, 851}; 852ATTRIBUTE_GROUPS(vmbus_drv); 853 854 855/* 856 * vmbus_match - Attempt to match the specified device to the specified driver 857 */ 858static int vmbus_match(struct device *device, struct device_driver *driver) 859{ 860 struct hv_driver *drv = drv_to_hv_drv(driver); 861 struct hv_device *hv_dev = device_to_hv_device(device); 862 863 /* The hv_sock driver handles all hv_sock offers. */ 864 if (is_hvsock_channel(hv_dev->channel)) 865 return drv->hvsock; 866 867 if (hv_vmbus_get_id(drv, hv_dev)) 868 return 1; 869 870 return 0; 871} 872 873/* 874 * vmbus_probe - Add the new vmbus's child device 875 */ 876static int vmbus_probe(struct device *child_device) 877{ 878 int ret = 0; 879 struct hv_driver *drv = 880 drv_to_hv_drv(child_device->driver); 881 struct hv_device *dev = device_to_hv_device(child_device); 882 const struct hv_vmbus_device_id *dev_id; 883 884 dev_id = hv_vmbus_get_id(drv, dev); 885 if (drv->probe) { 886 ret = drv->probe(dev, dev_id); 887 if (ret != 0) 888 pr_err("probe failed for device %s (%d)\n", 889 dev_name(child_device), ret); 890 891 } else { 892 pr_err("probe not set for driver %s\n", 893 dev_name(child_device)); 894 ret = -ENODEV; 895 } 896 return ret; 897} 898 899/* 900 * vmbus_remove - Remove a vmbus device 901 */ 902static int vmbus_remove(struct device *child_device) 903{ 904 struct hv_driver *drv; 905 struct hv_device *dev = device_to_hv_device(child_device); 906 907 if (child_device->driver) { 908 drv = drv_to_hv_drv(child_device->driver); 909 if (drv->remove) 910 drv->remove(dev); 911 } 912 913 return 0; 914} 915 916 917/* 918 * vmbus_shutdown - Shutdown a vmbus device 919 */ 920static void vmbus_shutdown(struct device *child_device) 921{ 922 struct hv_driver *drv; 923 struct hv_device *dev = device_to_hv_device(child_device); 924 925 926 /* The device may not be attached yet */ 927 if (!child_device->driver) 928 return; 929 930 drv = drv_to_hv_drv(child_device->driver); 931 932 if (drv->shutdown) 933 drv->shutdown(dev); 934} 935 936#ifdef CONFIG_PM_SLEEP 937/* 938 * vmbus_suspend - Suspend a vmbus device 939 */ 940static int vmbus_suspend(struct device *child_device) 941{ 942 struct hv_driver *drv; 943 struct hv_device *dev = device_to_hv_device(child_device); 944 945 /* The device may not be attached yet */ 946 if (!child_device->driver) 947 return 0; 948 949 drv = drv_to_hv_drv(child_device->driver); 950 if (!drv->suspend) 951 return -EOPNOTSUPP; 952 953 return drv->suspend(dev); 954} 955 956/* 957 * vmbus_resume - Resume a vmbus device 958 */ 959static int vmbus_resume(struct device *child_device) 960{ 961 struct hv_driver *drv; 962 struct hv_device *dev = device_to_hv_device(child_device); 963 964 /* The device may not be attached yet */ 965 if (!child_device->driver) 966 return 0; 967 968 drv = drv_to_hv_drv(child_device->driver); 969 if (!drv->resume) 970 return -EOPNOTSUPP; 971 972 return drv->resume(dev); 973} 974#else 975#define vmbus_suspend NULL 976#define vmbus_resume NULL 977#endif /* CONFIG_PM_SLEEP */ 978 979/* 980 * vmbus_device_release - Final callback release of the vmbus child device 981 */ 982static void vmbus_device_release(struct device *device) 983{ 984 struct hv_device *hv_dev = device_to_hv_device(device); 985 struct vmbus_channel *channel = hv_dev->channel; 986 987 hv_debug_rm_dev_dir(hv_dev); 988 989 mutex_lock(&vmbus_connection.channel_mutex); 990 hv_process_channel_removal(channel); 991 mutex_unlock(&vmbus_connection.channel_mutex); 992 kfree(hv_dev); 993} 994 995/* 996 * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. 997 * 998 * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we 999 * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there 1000 * is no way to wake up a Generation-2 VM. 1001 * 1002 * The other 4 ops are for hibernation. 1003 */ 1004 1005static const struct dev_pm_ops vmbus_pm = { 1006 .suspend_noirq = NULL, 1007 .resume_noirq = NULL, 1008 .freeze_noirq = vmbus_suspend, 1009 .thaw_noirq = vmbus_resume, 1010 .poweroff_noirq = vmbus_suspend, 1011 .restore_noirq = vmbus_resume, 1012}; 1013 1014/* The one and only one */ 1015static struct bus_type hv_bus = { 1016 .name = "vmbus", 1017 .match = vmbus_match, 1018 .shutdown = vmbus_shutdown, 1019 .remove = vmbus_remove, 1020 .probe = vmbus_probe, 1021 .uevent = vmbus_uevent, 1022 .dev_groups = vmbus_dev_groups, 1023 .drv_groups = vmbus_drv_groups, 1024 .pm = &vmbus_pm, 1025}; 1026 1027struct onmessage_work_context { 1028 struct work_struct work; 1029 struct { 1030 struct hv_message_header header; 1031 u8 payload[]; 1032 } msg; 1033}; 1034 1035static void vmbus_onmessage_work(struct work_struct *work) 1036{ 1037 struct onmessage_work_context *ctx; 1038 1039 /* Do not process messages if we're in DISCONNECTED state */ 1040 if (vmbus_connection.conn_state == DISCONNECTED) 1041 return; 1042 1043 ctx = container_of(work, struct onmessage_work_context, 1044 work); 1045 vmbus_onmessage((struct vmbus_channel_message_header *) 1046 &ctx->msg.payload); 1047 kfree(ctx); 1048} 1049 1050void vmbus_on_msg_dpc(unsigned long data) 1051{ 1052 struct hv_per_cpu_context *hv_cpu = (void *)data; 1053 void *page_addr = hv_cpu->synic_message_page; 1054 struct hv_message *msg = (struct hv_message *)page_addr + 1055 VMBUS_MESSAGE_SINT; 1056 struct vmbus_channel_message_header *hdr; 1057 const struct vmbus_channel_message_table_entry *entry; 1058 struct onmessage_work_context *ctx; 1059 u32 message_type = msg->header.message_type; 1060 1061 /* 1062 * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as 1063 * it is being used in 'struct vmbus_channel_message_header' definition 1064 * which is supposed to match hypervisor ABI. 1065 */ 1066 BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); 1067 1068 if (message_type == HVMSG_NONE) 1069 /* no msg */ 1070 return; 1071 1072 hdr = (struct vmbus_channel_message_header *)msg->u.payload; 1073 1074 trace_vmbus_on_msg_dpc(hdr); 1075 1076 if (hdr->msgtype >= CHANNELMSG_COUNT) { 1077 WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); 1078 goto msg_handled; 1079 } 1080 1081 if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { 1082 WARN_ONCE(1, "payload size is too large (%d)\n", 1083 msg->header.payload_size); 1084 goto msg_handled; 1085 } 1086 1087 entry = &channel_message_table[hdr->msgtype]; 1088 1089 if (!entry->message_handler) 1090 goto msg_handled; 1091 1092 if (msg->header.payload_size < entry->min_payload_len) { 1093 WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", 1094 hdr->msgtype, msg->header.payload_size); 1095 goto msg_handled; 1096 } 1097 1098 if (entry->handler_type == VMHT_BLOCKING) { 1099 ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size, 1100 GFP_ATOMIC); 1101 if (ctx == NULL) 1102 return; 1103 1104 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1105 memcpy(&ctx->msg, msg, sizeof(msg->header) + 1106 msg->header.payload_size); 1107 1108 /* 1109 * The host can generate a rescind message while we 1110 * may still be handling the original offer. We deal with 1111 * this condition by relying on the synchronization provided 1112 * by offer_in_progress and by channel_mutex. See also the 1113 * inline comments in vmbus_onoffer_rescind(). 1114 */ 1115 switch (hdr->msgtype) { 1116 case CHANNELMSG_RESCIND_CHANNELOFFER: 1117 /* 1118 * If we are handling the rescind message; 1119 * schedule the work on the global work queue. 1120 * 1121 * The OFFER message and the RESCIND message should 1122 * not be handled by the same serialized work queue, 1123 * because the OFFER handler may call vmbus_open(), 1124 * which tries to open the channel by sending an 1125 * OPEN_CHANNEL message to the host and waits for 1126 * the host's response; however, if the host has 1127 * rescinded the channel before it receives the 1128 * OPEN_CHANNEL message, the host just silently 1129 * ignores the OPEN_CHANNEL message; as a result, 1130 * the guest's OFFER handler hangs for ever, if we 1131 * handle the RESCIND message in the same serialized 1132 * work queue: the RESCIND handler can not start to 1133 * run before the OFFER handler finishes. 1134 */ 1135 schedule_work(&ctx->work); 1136 break; 1137 1138 case CHANNELMSG_OFFERCHANNEL: 1139 /* 1140 * The host sends the offer message of a given channel 1141 * before sending the rescind message of the same 1142 * channel. These messages are sent to the guest's 1143 * connect CPU; the guest then starts processing them 1144 * in the tasklet handler on this CPU: 1145 * 1146 * VMBUS_CONNECT_CPU 1147 * 1148 * [vmbus_on_msg_dpc()] 1149 * atomic_inc() // CHANNELMSG_OFFERCHANNEL 1150 * queue_work() 1151 * ... 1152 * [vmbus_on_msg_dpc()] 1153 * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER 1154 * 1155 * We rely on the memory-ordering properties of the 1156 * queue_work() and schedule_work() primitives, which 1157 * guarantee that the atomic increment will be visible 1158 * to the CPUs which will execute the offer & rescind 1159 * works by the time these works will start execution. 1160 */ 1161 atomic_inc(&vmbus_connection.offer_in_progress); 1162 fallthrough; 1163 1164 default: 1165 queue_work(vmbus_connection.work_queue, &ctx->work); 1166 } 1167 } else 1168 entry->message_handler(hdr); 1169 1170msg_handled: 1171 vmbus_signal_eom(msg, message_type); 1172} 1173 1174#ifdef CONFIG_PM_SLEEP 1175/* 1176 * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for 1177 * hibernation, because hv_sock connections can not persist across hibernation. 1178 */ 1179static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) 1180{ 1181 struct onmessage_work_context *ctx; 1182 struct vmbus_channel_rescind_offer *rescind; 1183 1184 WARN_ON(!is_hvsock_channel(channel)); 1185 1186 /* 1187 * Allocation size is small and the allocation should really not fail, 1188 * otherwise the state of the hv_sock connections ends up in limbo. 1189 */ 1190 ctx = kzalloc(sizeof(*ctx) + sizeof(*rescind), 1191 GFP_KERNEL | __GFP_NOFAIL); 1192 1193 /* 1194 * So far, these are not really used by Linux. Just set them to the 1195 * reasonable values conforming to the definitions of the fields. 1196 */ 1197 ctx->msg.header.message_type = 1; 1198 ctx->msg.header.payload_size = sizeof(*rescind); 1199 1200 /* These values are actually used by Linux. */ 1201 rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; 1202 rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; 1203 rescind->child_relid = channel->offermsg.child_relid; 1204 1205 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1206 1207 queue_work(vmbus_connection.work_queue, &ctx->work); 1208} 1209#endif /* CONFIG_PM_SLEEP */ 1210 1211/* 1212 * Schedule all channels with events pending 1213 */ 1214static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) 1215{ 1216 unsigned long *recv_int_page; 1217 u32 maxbits, relid; 1218 1219 if (vmbus_proto_version < VERSION_WIN8) { 1220 maxbits = MAX_NUM_CHANNELS_SUPPORTED; 1221 recv_int_page = vmbus_connection.recv_int_page; 1222 } else { 1223 /* 1224 * When the host is win8 and beyond, the event page 1225 * can be directly checked to get the id of the channel 1226 * that has the interrupt pending. 1227 */ 1228 void *page_addr = hv_cpu->synic_event_page; 1229 union hv_synic_event_flags *event 1230 = (union hv_synic_event_flags *)page_addr + 1231 VMBUS_MESSAGE_SINT; 1232 1233 maxbits = HV_EVENT_FLAGS_COUNT; 1234 recv_int_page = event->flags; 1235 } 1236 1237 if (unlikely(!recv_int_page)) 1238 return; 1239 1240 for_each_set_bit(relid, recv_int_page, maxbits) { 1241 void (*callback_fn)(void *context); 1242 struct vmbus_channel *channel; 1243 1244 if (!sync_test_and_clear_bit(relid, recv_int_page)) 1245 continue; 1246 1247 /* Special case - vmbus channel protocol msg */ 1248 if (relid == 0) 1249 continue; 1250 1251 /* 1252 * Pairs with the kfree_rcu() in vmbus_chan_release(). 1253 * Guarantees that the channel data structure doesn't 1254 * get freed while the channel pointer below is being 1255 * dereferenced. 1256 */ 1257 rcu_read_lock(); 1258 1259 /* Find channel based on relid */ 1260 channel = relid2channel(relid); 1261 if (channel == NULL) 1262 goto sched_unlock_rcu; 1263 1264 if (channel->rescind) 1265 goto sched_unlock_rcu; 1266 1267 /* 1268 * Make sure that the ring buffer data structure doesn't get 1269 * freed while we dereference the ring buffer pointer. Test 1270 * for the channel's onchannel_callback being NULL within a 1271 * sched_lock critical section. See also the inline comments 1272 * in vmbus_reset_channel_cb(). 1273 */ 1274 spin_lock(&channel->sched_lock); 1275 1276 callback_fn = channel->onchannel_callback; 1277 if (unlikely(callback_fn == NULL)) 1278 goto sched_unlock; 1279 1280 trace_vmbus_chan_sched(channel); 1281 1282 ++channel->interrupts; 1283 1284 switch (channel->callback_mode) { 1285 case HV_CALL_ISR: 1286 (*callback_fn)(channel->channel_callback_context); 1287 break; 1288 1289 case HV_CALL_BATCHED: 1290 hv_begin_read(&channel->inbound); 1291 fallthrough; 1292 case HV_CALL_DIRECT: 1293 tasklet_schedule(&channel->callback_event); 1294 } 1295 1296sched_unlock: 1297 spin_unlock(&channel->sched_lock); 1298sched_unlock_rcu: 1299 rcu_read_unlock(); 1300 } 1301} 1302 1303static void vmbus_isr(void) 1304{ 1305 struct hv_per_cpu_context *hv_cpu 1306 = this_cpu_ptr(hv_context.cpu_context); 1307 void *page_addr = hv_cpu->synic_event_page; 1308 struct hv_message *msg; 1309 union hv_synic_event_flags *event; 1310 bool handled = false; 1311 1312 if (unlikely(page_addr == NULL)) 1313 return; 1314 1315 event = (union hv_synic_event_flags *)page_addr + 1316 VMBUS_MESSAGE_SINT; 1317 /* 1318 * Check for events before checking for messages. This is the order 1319 * in which events and messages are checked in Windows guests on 1320 * Hyper-V, and the Windows team suggested we do the same. 1321 */ 1322 1323 if ((vmbus_proto_version == VERSION_WS2008) || 1324 (vmbus_proto_version == VERSION_WIN7)) { 1325 1326 /* Since we are a child, we only need to check bit 0 */ 1327 if (sync_test_and_clear_bit(0, event->flags)) 1328 handled = true; 1329 } else { 1330 /* 1331 * Our host is win8 or above. The signaling mechanism 1332 * has changed and we can directly look at the event page. 1333 * If bit n is set then we have an interrup on the channel 1334 * whose id is n. 1335 */ 1336 handled = true; 1337 } 1338 1339 if (handled) 1340 vmbus_chan_sched(hv_cpu); 1341 1342 page_addr = hv_cpu->synic_message_page; 1343 msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; 1344 1345 /* Check if there are actual msgs to be processed */ 1346 if (msg->header.message_type != HVMSG_NONE) { 1347 if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { 1348 hv_stimer0_isr(); 1349 vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED); 1350 } else 1351 tasklet_schedule(&hv_cpu->msg_dpc); 1352 } 1353 1354 add_interrupt_randomness(hv_get_vector()); 1355} 1356 1357/* 1358 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg 1359 * buffer and call into Hyper-V to transfer the data. 1360 */ 1361static void hv_kmsg_dump(struct kmsg_dumper *dumper, 1362 enum kmsg_dump_reason reason) 1363{ 1364 size_t bytes_written; 1365 phys_addr_t panic_pa; 1366 1367 /* We are only interested in panics. */ 1368 if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) 1369 return; 1370 1371 panic_pa = virt_to_phys(hv_panic_page); 1372 1373 /* 1374 * Write dump contents to the page. No need to synchronize; panic should 1375 * be single-threaded. 1376 */ 1377 kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, 1378 &bytes_written); 1379 if (bytes_written) 1380 hyperv_report_panic_msg(panic_pa, bytes_written); 1381} 1382 1383static struct kmsg_dumper hv_kmsg_dumper = { 1384 .dump = hv_kmsg_dump, 1385}; 1386 1387static struct ctl_table_header *hv_ctl_table_hdr; 1388 1389/* 1390 * sysctl option to allow the user to control whether kmsg data should be 1391 * reported to Hyper-V on panic. 1392 */ 1393static struct ctl_table hv_ctl_table[] = { 1394 { 1395 .procname = "hyperv_record_panic_msg", 1396 .data = &sysctl_record_panic_msg, 1397 .maxlen = sizeof(int), 1398 .mode = 0644, 1399 .proc_handler = proc_dointvec_minmax, 1400 .extra1 = SYSCTL_ZERO, 1401 .extra2 = SYSCTL_ONE 1402 }, 1403 {} 1404}; 1405 1406static struct ctl_table hv_root_table[] = { 1407 { 1408 .procname = "kernel", 1409 .mode = 0555, 1410 .child = hv_ctl_table 1411 }, 1412 {} 1413}; 1414 1415/* 1416 * vmbus_bus_init -Main vmbus driver initialization routine. 1417 * 1418 * Here, we 1419 * - initialize the vmbus driver context 1420 * - invoke the vmbus hv main init routine 1421 * - retrieve the channel offers 1422 */ 1423static int vmbus_bus_init(void) 1424{ 1425 int ret; 1426 1427 ret = hv_init(); 1428 if (ret != 0) { 1429 pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); 1430 return ret; 1431 } 1432 1433 ret = bus_register(&hv_bus); 1434 if (ret) 1435 return ret; 1436 1437 ret = hv_setup_vmbus_irq(vmbus_irq, vmbus_isr); 1438 if (ret) 1439 goto err_setup; 1440 1441 ret = hv_synic_alloc(); 1442 if (ret) 1443 goto err_alloc; 1444 1445 /* 1446 * Initialize the per-cpu interrupt state and stimer state. 1447 * Then connect to the host. 1448 */ 1449 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", 1450 hv_synic_init, hv_synic_cleanup); 1451 if (ret < 0) 1452 goto err_cpuhp; 1453 hyperv_cpuhp_online = ret; 1454 1455 ret = vmbus_connect(); 1456 if (ret) 1457 goto err_connect; 1458 1459 /* 1460 * Only register if the crash MSRs are available 1461 */ 1462 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 1463 u64 hyperv_crash_ctl; 1464 /* 1465 * Sysctl registration is not fatal, since by default 1466 * reporting is enabled. 1467 */ 1468 hv_ctl_table_hdr = register_sysctl_table(hv_root_table); 1469 if (!hv_ctl_table_hdr) 1470 pr_err("Hyper-V: sysctl table register error"); 1471 1472 /* 1473 * Register for panic kmsg callback only if the right 1474 * capability is supported by the hypervisor. 1475 */ 1476 hv_get_crash_ctl(hyperv_crash_ctl); 1477 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { 1478 hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); 1479 if (hv_panic_page) { 1480 ret = kmsg_dump_register(&hv_kmsg_dumper); 1481 if (ret) { 1482 pr_err("Hyper-V: kmsg dump register " 1483 "error 0x%x\n", ret); 1484 hv_free_hyperv_page( 1485 (unsigned long)hv_panic_page); 1486 hv_panic_page = NULL; 1487 } 1488 } else 1489 pr_err("Hyper-V: panic message page memory " 1490 "allocation failed"); 1491 } 1492 1493 register_die_notifier(&hyperv_die_block); 1494 } 1495 1496 /* 1497 * Always register the panic notifier because we need to unload 1498 * the VMbus channel connection to prevent any VMbus 1499 * activity after the VM panics. 1500 */ 1501 atomic_notifier_chain_register(&panic_notifier_list, 1502 &hyperv_panic_block); 1503 1504 vmbus_request_offers(); 1505 1506 return 0; 1507 1508err_connect: 1509 cpuhp_remove_state(hyperv_cpuhp_online); 1510err_cpuhp: 1511 hv_synic_free(); 1512err_alloc: 1513 hv_remove_vmbus_irq(); 1514err_setup: 1515 bus_unregister(&hv_bus); 1516 unregister_sysctl_table(hv_ctl_table_hdr); 1517 hv_ctl_table_hdr = NULL; 1518 return ret; 1519} 1520 1521/** 1522 * __vmbus_child_driver_register() - Register a vmbus's driver 1523 * @hv_driver: Pointer to driver structure you want to register 1524 * @owner: owner module of the drv 1525 * @mod_name: module name string 1526 * 1527 * Registers the given driver with Linux through the 'driver_register()' call 1528 * and sets up the hyper-v vmbus handling for this driver. 1529 * It will return the state of the 'driver_register()' call. 1530 * 1531 */ 1532int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) 1533{ 1534 int ret; 1535 1536 pr_info("registering driver %s\n", hv_driver->name); 1537 1538 ret = vmbus_exists(); 1539 if (ret < 0) 1540 return ret; 1541 1542 hv_driver->driver.name = hv_driver->name; 1543 hv_driver->driver.owner = owner; 1544 hv_driver->driver.mod_name = mod_name; 1545 hv_driver->driver.bus = &hv_bus; 1546 1547 spin_lock_init(&hv_driver->dynids.lock); 1548 INIT_LIST_HEAD(&hv_driver->dynids.list); 1549 1550 ret = driver_register(&hv_driver->driver); 1551 1552 return ret; 1553} 1554EXPORT_SYMBOL_GPL(__vmbus_driver_register); 1555 1556/** 1557 * vmbus_driver_unregister() - Unregister a vmbus's driver 1558 * @hv_driver: Pointer to driver structure you want to 1559 * un-register 1560 * 1561 * Un-register the given driver that was previous registered with a call to 1562 * vmbus_driver_register() 1563 */ 1564void vmbus_driver_unregister(struct hv_driver *hv_driver) 1565{ 1566 pr_info("unregistering driver %s\n", hv_driver->name); 1567 1568 if (!vmbus_exists()) { 1569 driver_unregister(&hv_driver->driver); 1570 vmbus_free_dynids(hv_driver); 1571 } 1572} 1573EXPORT_SYMBOL_GPL(vmbus_driver_unregister); 1574 1575 1576/* 1577 * Called when last reference to channel is gone. 1578 */ 1579static void vmbus_chan_release(struct kobject *kobj) 1580{ 1581 struct vmbus_channel *channel 1582 = container_of(kobj, struct vmbus_channel, kobj); 1583 1584 kfree_rcu(channel, rcu); 1585} 1586 1587struct vmbus_chan_attribute { 1588 struct attribute attr; 1589 ssize_t (*show)(struct vmbus_channel *chan, char *buf); 1590 ssize_t (*store)(struct vmbus_channel *chan, 1591 const char *buf, size_t count); 1592}; 1593#define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ 1594 struct vmbus_chan_attribute chan_attr_##_name \ 1595 = __ATTR(_name, _mode, _show, _store) 1596#define VMBUS_CHAN_ATTR_RW(_name) \ 1597 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) 1598#define VMBUS_CHAN_ATTR_RO(_name) \ 1599 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) 1600#define VMBUS_CHAN_ATTR_WO(_name) \ 1601 struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) 1602 1603static ssize_t vmbus_chan_attr_show(struct kobject *kobj, 1604 struct attribute *attr, char *buf) 1605{ 1606 const struct vmbus_chan_attribute *attribute 1607 = container_of(attr, struct vmbus_chan_attribute, attr); 1608 struct vmbus_channel *chan 1609 = container_of(kobj, struct vmbus_channel, kobj); 1610 1611 if (!attribute->show) 1612 return -EIO; 1613 1614 return attribute->show(chan, buf); 1615} 1616 1617static ssize_t vmbus_chan_attr_store(struct kobject *kobj, 1618 struct attribute *attr, const char *buf, 1619 size_t count) 1620{ 1621 const struct vmbus_chan_attribute *attribute 1622 = container_of(attr, struct vmbus_chan_attribute, attr); 1623 struct vmbus_channel *chan 1624 = container_of(kobj, struct vmbus_channel, kobj); 1625 1626 if (!attribute->store) 1627 return -EIO; 1628 1629 return attribute->store(chan, buf, count); 1630} 1631 1632static const struct sysfs_ops vmbus_chan_sysfs_ops = { 1633 .show = vmbus_chan_attr_show, 1634 .store = vmbus_chan_attr_store, 1635}; 1636 1637static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) 1638{ 1639 struct hv_ring_buffer_info *rbi = &channel->outbound; 1640 ssize_t ret; 1641 1642 mutex_lock(&rbi->ring_buffer_mutex); 1643 if (!rbi->ring_buffer) { 1644 mutex_unlock(&rbi->ring_buffer_mutex); 1645 return -EINVAL; 1646 } 1647 1648 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1649 mutex_unlock(&rbi->ring_buffer_mutex); 1650 return ret; 1651} 1652static VMBUS_CHAN_ATTR_RO(out_mask); 1653 1654static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) 1655{ 1656 struct hv_ring_buffer_info *rbi = &channel->inbound; 1657 ssize_t ret; 1658 1659 mutex_lock(&rbi->ring_buffer_mutex); 1660 if (!rbi->ring_buffer) { 1661 mutex_unlock(&rbi->ring_buffer_mutex); 1662 return -EINVAL; 1663 } 1664 1665 ret = sprintf(buf, "%u\n", rbi->ring_buffer->interrupt_mask); 1666 mutex_unlock(&rbi->ring_buffer_mutex); 1667 return ret; 1668} 1669static VMBUS_CHAN_ATTR_RO(in_mask); 1670 1671static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) 1672{ 1673 struct hv_ring_buffer_info *rbi = &channel->inbound; 1674 ssize_t ret; 1675 1676 mutex_lock(&rbi->ring_buffer_mutex); 1677 if (!rbi->ring_buffer) { 1678 mutex_unlock(&rbi->ring_buffer_mutex); 1679 return -EINVAL; 1680 } 1681 1682 ret = sprintf(buf, "%u\n", hv_get_bytes_to_read(rbi)); 1683 mutex_unlock(&rbi->ring_buffer_mutex); 1684 return ret; 1685} 1686static VMBUS_CHAN_ATTR_RO(read_avail); 1687 1688static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) 1689{ 1690 struct hv_ring_buffer_info *rbi = &channel->outbound; 1691 ssize_t ret; 1692 1693 mutex_lock(&rbi->ring_buffer_mutex); 1694 if (!rbi->ring_buffer) { 1695 mutex_unlock(&rbi->ring_buffer_mutex); 1696 return -EINVAL; 1697 } 1698 1699 ret = sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); 1700 mutex_unlock(&rbi->ring_buffer_mutex); 1701 return ret; 1702} 1703static VMBUS_CHAN_ATTR_RO(write_avail); 1704 1705static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) 1706{ 1707 return sprintf(buf, "%u\n", channel->target_cpu); 1708} 1709static ssize_t target_cpu_store(struct vmbus_channel *channel, 1710 const char *buf, size_t count) 1711{ 1712 u32 target_cpu, origin_cpu; 1713 ssize_t ret = count; 1714 1715 if (vmbus_proto_version < VERSION_WIN10_V4_1) 1716 return -EIO; 1717 1718 if (sscanf(buf, "%uu", &target_cpu) != 1) 1719 return -EIO; 1720 1721 /* Validate target_cpu for the cpumask_test_cpu() operation below. */ 1722 if (target_cpu >= nr_cpumask_bits) 1723 return -EINVAL; 1724 1725 /* No CPUs should come up or down during this. */ 1726 cpus_read_lock(); 1727 1728 if (!cpu_online(target_cpu)) { 1729 cpus_read_unlock(); 1730 return -EINVAL; 1731 } 1732 1733 /* 1734 * Synchronizes target_cpu_store() and channel closure: 1735 * 1736 * { Initially: state = CHANNEL_OPENED } 1737 * 1738 * CPU1 CPU2 1739 * 1740 * [target_cpu_store()] [vmbus_disconnect_ring()] 1741 * 1742 * LOCK channel_mutex LOCK channel_mutex 1743 * LOAD r1 = state LOAD r2 = state 1744 * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) 1745 * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN 1746 * [...] SEND CLOSECHANNEL 1747 * UNLOCK channel_mutex UNLOCK channel_mutex 1748 * 1749 * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes 1750 * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND 1751 * 1752 * Note. The host processes the channel messages "sequentially", in 1753 * the order in which they are received on a per-partition basis. 1754 */ 1755 mutex_lock(&vmbus_connection.channel_mutex); 1756 1757 /* 1758 * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; 1759 * avoid sending the message and fail here for such channels. 1760 */ 1761 if (channel->state != CHANNEL_OPENED_STATE) { 1762 ret = -EIO; 1763 goto cpu_store_unlock; 1764 } 1765 1766 origin_cpu = channel->target_cpu; 1767 if (target_cpu == origin_cpu) 1768 goto cpu_store_unlock; 1769 1770 if (vmbus_send_modifychannel(channel->offermsg.child_relid, 1771 hv_cpu_number_to_vp_number(target_cpu))) { 1772 ret = -EIO; 1773 goto cpu_store_unlock; 1774 } 1775 1776 /* 1777 * Warning. At this point, there is *no* guarantee that the host will 1778 * have successfully processed the vmbus_send_modifychannel() request. 1779 * See the header comment of vmbus_send_modifychannel() for more info. 1780 * 1781 * Lags in the processing of the above vmbus_send_modifychannel() can 1782 * result in missed interrupts if the "old" target CPU is taken offline 1783 * before Hyper-V starts sending interrupts to the "new" target CPU. 1784 * But apart from this offlining scenario, the code tolerates such 1785 * lags. It will function correctly even if a channel interrupt comes 1786 * in on a CPU that is different from the channel target_cpu value. 1787 */ 1788 1789 channel->target_cpu = target_cpu; 1790 1791 /* See init_vp_index(). */ 1792 if (hv_is_perf_channel(channel)) 1793 hv_update_alloced_cpus(origin_cpu, target_cpu); 1794 1795 /* Currently set only for storvsc channels. */ 1796 if (channel->change_target_cpu_callback) { 1797 (*channel->change_target_cpu_callback)(channel, 1798 origin_cpu, target_cpu); 1799 } 1800 1801cpu_store_unlock: 1802 mutex_unlock(&vmbus_connection.channel_mutex); 1803 cpus_read_unlock(); 1804 return ret; 1805} 1806static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); 1807 1808static ssize_t channel_pending_show(struct vmbus_channel *channel, 1809 char *buf) 1810{ 1811 return sprintf(buf, "%d\n", 1812 channel_pending(channel, 1813 vmbus_connection.monitor_pages[1])); 1814} 1815static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL); 1816 1817static ssize_t channel_latency_show(struct vmbus_channel *channel, 1818 char *buf) 1819{ 1820 return sprintf(buf, "%d\n", 1821 channel_latency(channel, 1822 vmbus_connection.monitor_pages[1])); 1823} 1824static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL); 1825 1826static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) 1827{ 1828 return sprintf(buf, "%llu\n", channel->interrupts); 1829} 1830static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL); 1831 1832static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) 1833{ 1834 return sprintf(buf, "%llu\n", channel->sig_events); 1835} 1836static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL); 1837 1838static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, 1839 char *buf) 1840{ 1841 return sprintf(buf, "%llu\n", 1842 (unsigned long long)channel->intr_in_full); 1843} 1844static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); 1845 1846static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, 1847 char *buf) 1848{ 1849 return sprintf(buf, "%llu\n", 1850 (unsigned long long)channel->intr_out_empty); 1851} 1852static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); 1853 1854static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, 1855 char *buf) 1856{ 1857 return sprintf(buf, "%llu\n", 1858 (unsigned long long)channel->out_full_first); 1859} 1860static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); 1861 1862static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, 1863 char *buf) 1864{ 1865 return sprintf(buf, "%llu\n", 1866 (unsigned long long)channel->out_full_total); 1867} 1868static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); 1869 1870static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, 1871 char *buf) 1872{ 1873 return sprintf(buf, "%u\n", channel->offermsg.monitorid); 1874} 1875static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL); 1876 1877static ssize_t subchannel_id_show(struct vmbus_channel *channel, 1878 char *buf) 1879{ 1880 return sprintf(buf, "%u\n", 1881 channel->offermsg.offer.sub_channel_index); 1882} 1883static VMBUS_CHAN_ATTR_RO(subchannel_id); 1884 1885static struct attribute *vmbus_chan_attrs[] = { 1886 &chan_attr_out_mask.attr, 1887 &chan_attr_in_mask.attr, 1888 &chan_attr_read_avail.attr, 1889 &chan_attr_write_avail.attr, 1890 &chan_attr_cpu.attr, 1891 &chan_attr_pending.attr, 1892 &chan_attr_latency.attr, 1893 &chan_attr_interrupts.attr, 1894 &chan_attr_events.attr, 1895 &chan_attr_intr_in_full.attr, 1896 &chan_attr_intr_out_empty.attr, 1897 &chan_attr_out_full_first.attr, 1898 &chan_attr_out_full_total.attr, 1899 &chan_attr_monitor_id.attr, 1900 &chan_attr_subchannel_id.attr, 1901 NULL 1902}; 1903 1904/* 1905 * Channel-level attribute_group callback function. Returns the permission for 1906 * each attribute, and returns 0 if an attribute is not visible. 1907 */ 1908static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, 1909 struct attribute *attr, int idx) 1910{ 1911 const struct vmbus_channel *channel = 1912 container_of(kobj, struct vmbus_channel, kobj); 1913 1914 /* Hide the monitor attributes if the monitor mechanism is not used. */ 1915 if (!channel->offermsg.monitor_allocated && 1916 (attr == &chan_attr_pending.attr || 1917 attr == &chan_attr_latency.attr || 1918 attr == &chan_attr_monitor_id.attr)) 1919 return 0; 1920 1921 return attr->mode; 1922} 1923 1924static struct attribute_group vmbus_chan_group = { 1925 .attrs = vmbus_chan_attrs, 1926 .is_visible = vmbus_chan_attr_is_visible 1927}; 1928 1929static struct kobj_type vmbus_chan_ktype = { 1930 .sysfs_ops = &vmbus_chan_sysfs_ops, 1931 .release = vmbus_chan_release, 1932}; 1933 1934/* 1935 * vmbus_add_channel_kobj - setup a sub-directory under device/channels 1936 */ 1937int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) 1938{ 1939 const struct device *device = &dev->device; 1940 struct kobject *kobj = &channel->kobj; 1941 u32 relid = channel->offermsg.child_relid; 1942 int ret; 1943 1944 kobj->kset = dev->channels_kset; 1945 ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, 1946 "%u", relid); 1947 if (ret) { 1948 kobject_put(kobj); 1949 return ret; 1950 } 1951 1952 ret = sysfs_create_group(kobj, &vmbus_chan_group); 1953 1954 if (ret) { 1955 /* 1956 * The calling functions' error handling paths will cleanup the 1957 * empty channel directory. 1958 */ 1959 kobject_put(kobj); 1960 dev_err(device, "Unable to set up channel sysfs files\n"); 1961 return ret; 1962 } 1963 1964 kobject_uevent(kobj, KOBJ_ADD); 1965 1966 return 0; 1967} 1968 1969/* 1970 * vmbus_remove_channel_attr_group - remove the channel's attribute group 1971 */ 1972void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) 1973{ 1974 sysfs_remove_group(&channel->kobj, &vmbus_chan_group); 1975} 1976 1977/* 1978 * vmbus_device_create - Creates and registers a new child device 1979 * on the vmbus. 1980 */ 1981struct hv_device *vmbus_device_create(const guid_t *type, 1982 const guid_t *instance, 1983 struct vmbus_channel *channel) 1984{ 1985 struct hv_device *child_device_obj; 1986 1987 child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); 1988 if (!child_device_obj) { 1989 pr_err("Unable to allocate device object for child device\n"); 1990 return NULL; 1991 } 1992 1993 child_device_obj->channel = channel; 1994 guid_copy(&child_device_obj->dev_type, type); 1995 guid_copy(&child_device_obj->dev_instance, instance); 1996 child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ 1997 1998 return child_device_obj; 1999} 2000 2001/* 2002 * vmbus_device_register - Register the child device 2003 */ 2004int vmbus_device_register(struct hv_device *child_device_obj) 2005{ 2006 struct kobject *kobj = &child_device_obj->device.kobj; 2007 int ret; 2008 2009 dev_set_name(&child_device_obj->device, "%pUl", 2010 &child_device_obj->channel->offermsg.offer.if_instance); 2011 2012 child_device_obj->device.bus = &hv_bus; 2013 child_device_obj->device.parent = &hv_acpi_dev->dev; 2014 child_device_obj->device.release = vmbus_device_release; 2015 2016 /* 2017 * Register with the LDM. This will kick off the driver/device 2018 * binding...which will eventually call vmbus_match() and vmbus_probe() 2019 */ 2020 ret = device_register(&child_device_obj->device); 2021 if (ret) { 2022 pr_err("Unable to register child device\n"); 2023 put_device(&child_device_obj->device); 2024 return ret; 2025 } 2026 2027 child_device_obj->channels_kset = kset_create_and_add("channels", 2028 NULL, kobj); 2029 if (!child_device_obj->channels_kset) { 2030 ret = -ENOMEM; 2031 goto err_dev_unregister; 2032 } 2033 2034 ret = vmbus_add_channel_kobj(child_device_obj, 2035 child_device_obj->channel); 2036 if (ret) { 2037 pr_err("Unable to register primary channeln"); 2038 goto err_kset_unregister; 2039 } 2040 hv_debug_add_dev_dir(child_device_obj); 2041 2042 return 0; 2043 2044err_kset_unregister: 2045 kset_unregister(child_device_obj->channels_kset); 2046 2047err_dev_unregister: 2048 device_unregister(&child_device_obj->device); 2049 return ret; 2050} 2051 2052/* 2053 * vmbus_device_unregister - Remove the specified child device 2054 * from the vmbus. 2055 */ 2056void vmbus_device_unregister(struct hv_device *device_obj) 2057{ 2058 pr_debug("child device %s unregistered\n", 2059 dev_name(&device_obj->device)); 2060 2061 kset_unregister(device_obj->channels_kset); 2062 2063 /* 2064 * Kick off the process of unregistering the device. 2065 * This will call vmbus_remove() and eventually vmbus_device_release() 2066 */ 2067 device_unregister(&device_obj->device); 2068} 2069 2070 2071/* 2072 * VMBUS is an acpi enumerated device. Get the information we 2073 * need from DSDT. 2074 */ 2075#define VTPM_BASE_ADDRESS 0xfed40000 2076static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) 2077{ 2078 resource_size_t start = 0; 2079 resource_size_t end = 0; 2080 struct resource *new_res; 2081 struct resource **old_res = &hyperv_mmio; 2082 struct resource **prev_res = NULL; 2083 struct resource r; 2084 2085 switch (res->type) { 2086 2087 /* 2088 * "Address" descriptors are for bus windows. Ignore 2089 * "memory" descriptors, which are for registers on 2090 * devices. 2091 */ 2092 case ACPI_RESOURCE_TYPE_ADDRESS32: 2093 start = res->data.address32.address.minimum; 2094 end = res->data.address32.address.maximum; 2095 break; 2096 2097 case ACPI_RESOURCE_TYPE_ADDRESS64: 2098 start = res->data.address64.address.minimum; 2099 end = res->data.address64.address.maximum; 2100 break; 2101 2102 /* 2103 * The IRQ information is needed only on ARM64, which Hyper-V 2104 * sets up in the extended format. IRQ information is present 2105 * on x86/x64 in the non-extended format but it is not used by 2106 * Linux. So don't bother checking for the non-extended format. 2107 */ 2108 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: 2109 if (!acpi_dev_resource_interrupt(res, 0, &r)) { 2110 pr_err("Unable to parse Hyper-V ACPI interrupt\n"); 2111 return AE_ERROR; 2112 } 2113 /* ARM64 INTID for VMbus */ 2114 vmbus_interrupt = res->data.extended_irq.interrupts[0]; 2115 /* Linux IRQ number */ 2116 vmbus_irq = r.start; 2117 return AE_OK; 2118 2119 default: 2120 /* Unused resource type */ 2121 return AE_OK; 2122 2123 } 2124 /* 2125 * Ignore ranges that are below 1MB, as they're not 2126 * necessary or useful here. 2127 */ 2128 if (end < 0x100000) 2129 return AE_OK; 2130 2131 new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); 2132 if (!new_res) 2133 return AE_NO_MEMORY; 2134 2135 /* If this range overlaps the virtual TPM, truncate it. */ 2136 if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) 2137 end = VTPM_BASE_ADDRESS; 2138 2139 new_res->name = "hyperv mmio"; 2140 new_res->flags = IORESOURCE_MEM; 2141 new_res->start = start; 2142 new_res->end = end; 2143 2144 /* 2145 * If two ranges are adjacent, merge them. 2146 */ 2147 do { 2148 if (!*old_res) { 2149 *old_res = new_res; 2150 break; 2151 } 2152 2153 if (((*old_res)->end + 1) == new_res->start) { 2154 (*old_res)->end = new_res->end; 2155 kfree(new_res); 2156 break; 2157 } 2158 2159 if ((*old_res)->start == new_res->end + 1) { 2160 (*old_res)->start = new_res->start; 2161 kfree(new_res); 2162 break; 2163 } 2164 2165 if ((*old_res)->start > new_res->end) { 2166 new_res->sibling = *old_res; 2167 if (prev_res) 2168 (*prev_res)->sibling = new_res; 2169 *old_res = new_res; 2170 break; 2171 } 2172 2173 prev_res = old_res; 2174 old_res = &(*old_res)->sibling; 2175 2176 } while (1); 2177 2178 return AE_OK; 2179} 2180 2181static int vmbus_acpi_remove(struct acpi_device *device) 2182{ 2183 struct resource *cur_res; 2184 struct resource *next_res; 2185 2186 if (hyperv_mmio) { 2187 if (fb_mmio) { 2188 __release_region(hyperv_mmio, fb_mmio->start, 2189 resource_size(fb_mmio)); 2190 fb_mmio = NULL; 2191 } 2192 2193 for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { 2194 next_res = cur_res->sibling; 2195 kfree(cur_res); 2196 } 2197 } 2198 2199 return 0; 2200} 2201 2202static void vmbus_reserve_fb(void) 2203{ 2204 int size; 2205 /* 2206 * Make a claim for the frame buffer in the resource tree under the 2207 * first node, which will be the one below 4GB. The length seems to 2208 * be underreported, particularly in a Generation 1 VM. So start out 2209 * reserving a larger area and make it smaller until it succeeds. 2210 */ 2211 2212 if (screen_info.lfb_base) { 2213 if (efi_enabled(EFI_BOOT)) 2214 size = max_t(__u32, screen_info.lfb_size, 0x800000); 2215 else 2216 size = max_t(__u32, screen_info.lfb_size, 0x4000000); 2217 2218 for (; !fb_mmio && (size >= 0x100000); size >>= 1) { 2219 fb_mmio = __request_region(hyperv_mmio, 2220 screen_info.lfb_base, size, 2221 fb_mmio_name, 0); 2222 } 2223 } 2224} 2225 2226/** 2227 * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. 2228 * @new: If successful, supplied a pointer to the 2229 * allocated MMIO space. 2230 * @device_obj: Identifies the caller 2231 * @min: Minimum guest physical address of the 2232 * allocation 2233 * @max: Maximum guest physical address 2234 * @size: Size of the range to be allocated 2235 * @align: Alignment of the range to be allocated 2236 * @fb_overlap_ok: Whether this allocation can be allowed 2237 * to overlap the video frame buffer. 2238 * 2239 * This function walks the resources granted to VMBus by the 2240 * _CRS object in the ACPI namespace underneath the parent 2241 * "bridge" whether that's a root PCI bus in the Generation 1 2242 * case or a Module Device in the Generation 2 case. It then 2243 * attempts to allocate from the global MMIO pool in a way that 2244 * matches the constraints supplied in these parameters and by 2245 * that _CRS. 2246 * 2247 * Return: 0 on success, -errno on failure 2248 */ 2249int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, 2250 resource_size_t min, resource_size_t max, 2251 resource_size_t size, resource_size_t align, 2252 bool fb_overlap_ok) 2253{ 2254 struct resource *iter, *shadow; 2255 resource_size_t range_min, range_max, start, end; 2256 const char *dev_n = dev_name(&device_obj->device); 2257 int retval; 2258 2259 retval = -ENXIO; 2260 mutex_lock(&hyperv_mmio_lock); 2261 2262 /* 2263 * If overlaps with frame buffers are allowed, then first attempt to 2264 * make the allocation from within the reserved region. Because it 2265 * is already reserved, no shadow allocation is necessary. 2266 */ 2267 if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && 2268 !(max < fb_mmio->start)) { 2269 2270 range_min = fb_mmio->start; 2271 range_max = fb_mmio->end; 2272 start = (range_min + align - 1) & ~(align - 1); 2273 for (; start + size - 1 <= range_max; start += align) { 2274 *new = request_mem_region_exclusive(start, size, dev_n); 2275 if (*new) { 2276 retval = 0; 2277 goto exit; 2278 } 2279 } 2280 } 2281 2282 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2283 if ((iter->start >= max) || (iter->end <= min)) 2284 continue; 2285 2286 range_min = iter->start; 2287 range_max = iter->end; 2288 start = (range_min + align - 1) & ~(align - 1); 2289 for (; start + size - 1 <= range_max; start += align) { 2290 end = start + size - 1; 2291 2292 /* Skip the whole fb_mmio region if not fb_overlap_ok */ 2293 if (!fb_overlap_ok && fb_mmio && 2294 (((start >= fb_mmio->start) && (start <= fb_mmio->end)) || 2295 ((end >= fb_mmio->start) && (end <= fb_mmio->end)))) 2296 continue; 2297 2298 shadow = __request_region(iter, start, size, NULL, 2299 IORESOURCE_BUSY); 2300 if (!shadow) 2301 continue; 2302 2303 *new = request_mem_region_exclusive(start, size, dev_n); 2304 if (*new) { 2305 shadow->name = (char *)*new; 2306 retval = 0; 2307 goto exit; 2308 } 2309 2310 __release_region(iter, start, size); 2311 } 2312 } 2313 2314exit: 2315 mutex_unlock(&hyperv_mmio_lock); 2316 return retval; 2317} 2318EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); 2319 2320/** 2321 * vmbus_free_mmio() - Free a memory-mapped I/O range. 2322 * @start: Base address of region to release. 2323 * @size: Size of the range to be allocated 2324 * 2325 * This function releases anything requested by 2326 * vmbus_mmio_allocate(). 2327 */ 2328void vmbus_free_mmio(resource_size_t start, resource_size_t size) 2329{ 2330 struct resource *iter; 2331 2332 mutex_lock(&hyperv_mmio_lock); 2333 for (iter = hyperv_mmio; iter; iter = iter->sibling) { 2334 if ((iter->start >= start + size) || (iter->end <= start)) 2335 continue; 2336 2337 __release_region(iter, start, size); 2338 } 2339 release_mem_region(start, size); 2340 mutex_unlock(&hyperv_mmio_lock); 2341 2342} 2343EXPORT_SYMBOL_GPL(vmbus_free_mmio); 2344 2345static int vmbus_acpi_add(struct acpi_device *device) 2346{ 2347 acpi_status result; 2348 int ret_val = -ENODEV; 2349 struct acpi_device *ancestor; 2350 2351 hv_acpi_dev = device; 2352 2353 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 2354 vmbus_walk_resources, NULL); 2355 2356 if (ACPI_FAILURE(result)) 2357 goto acpi_walk_err; 2358 /* 2359 * Some ancestor of the vmbus acpi device (Gen1 or Gen2 2360 * firmware) is the VMOD that has the mmio ranges. Get that. 2361 */ 2362 for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { 2363 result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, 2364 vmbus_walk_resources, NULL); 2365 2366 if (ACPI_FAILURE(result)) 2367 continue; 2368 if (hyperv_mmio) { 2369 vmbus_reserve_fb(); 2370 break; 2371 } 2372 } 2373 ret_val = 0; 2374 2375acpi_walk_err: 2376 complete(&probe_event); 2377 if (ret_val) 2378 vmbus_acpi_remove(device); 2379 return ret_val; 2380} 2381 2382#ifdef CONFIG_PM_SLEEP 2383static int vmbus_bus_suspend(struct device *dev) 2384{ 2385 struct vmbus_channel *channel, *sc; 2386 2387 while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 2388 /* 2389 * We wait here until the completion of any channel 2390 * offers that are currently in progress. 2391 */ 2392 msleep(1); 2393 } 2394 2395 mutex_lock(&vmbus_connection.channel_mutex); 2396 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2397 if (!is_hvsock_channel(channel)) 2398 continue; 2399 2400 vmbus_force_channel_rescinded(channel); 2401 } 2402 mutex_unlock(&vmbus_connection.channel_mutex); 2403 2404 /* 2405 * Wait until all the sub-channels and hv_sock channels have been 2406 * cleaned up. Sub-channels should be destroyed upon suspend, otherwise 2407 * they would conflict with the new sub-channels that will be created 2408 * in the resume path. hv_sock channels should also be destroyed, but 2409 * a hv_sock channel of an established hv_sock connection can not be 2410 * really destroyed since it may still be referenced by the userspace 2411 * application, so we just force the hv_sock channel to be rescinded 2412 * by vmbus_force_channel_rescinded(), and the userspace application 2413 * will thoroughly destroy the channel after hibernation. 2414 * 2415 * Note: the counter nr_chan_close_on_suspend may never go above 0 if 2416 * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. 2417 */ 2418 if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) 2419 wait_for_completion(&vmbus_connection.ready_for_suspend_event); 2420 2421 if (atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0) { 2422 pr_err("Can not suspend due to a previous failed resuming\n"); 2423 return -EBUSY; 2424 } 2425 2426 mutex_lock(&vmbus_connection.channel_mutex); 2427 2428 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2429 /* 2430 * Remove the channel from the array of channels and invalidate 2431 * the channel's relid. Upon resume, vmbus_onoffer() will fix 2432 * up the relid (and other fields, if necessary) and add the 2433 * channel back to the array. 2434 */ 2435 vmbus_channel_unmap_relid(channel); 2436 channel->offermsg.child_relid = INVALID_RELID; 2437 2438 if (is_hvsock_channel(channel)) { 2439 if (!channel->rescind) { 2440 pr_err("hv_sock channel not rescinded!\n"); 2441 WARN_ON_ONCE(1); 2442 } 2443 continue; 2444 } 2445 2446 list_for_each_entry(sc, &channel->sc_list, sc_list) { 2447 pr_err("Sub-channel not deleted!\n"); 2448 WARN_ON_ONCE(1); 2449 } 2450 2451 atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); 2452 } 2453 2454 mutex_unlock(&vmbus_connection.channel_mutex); 2455 2456 vmbus_initiate_unload(false); 2457 2458 /* Reset the event for the next resume. */ 2459 reinit_completion(&vmbus_connection.ready_for_resume_event); 2460 2461 return 0; 2462} 2463 2464static int vmbus_bus_resume(struct device *dev) 2465{ 2466 struct vmbus_channel_msginfo *msginfo; 2467 size_t msgsize; 2468 int ret; 2469 2470 /* 2471 * We only use the 'vmbus_proto_version', which was in use before 2472 * hibernation, to re-negotiate with the host. 2473 */ 2474 if (!vmbus_proto_version) { 2475 pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); 2476 return -EINVAL; 2477 } 2478 2479 msgsize = sizeof(*msginfo) + 2480 sizeof(struct vmbus_channel_initiate_contact); 2481 2482 msginfo = kzalloc(msgsize, GFP_KERNEL); 2483 2484 if (msginfo == NULL) 2485 return -ENOMEM; 2486 2487 ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); 2488 2489 kfree(msginfo); 2490 2491 if (ret != 0) 2492 return ret; 2493 2494 WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); 2495 2496 vmbus_request_offers(); 2497 2498 if (wait_for_completion_timeout( 2499 &vmbus_connection.ready_for_resume_event, 10 * HZ) == 0) 2500 pr_err("Some vmbus device is missing after suspending?\n"); 2501 2502 /* Reset the event for the next suspend. */ 2503 reinit_completion(&vmbus_connection.ready_for_suspend_event); 2504 2505 return 0; 2506} 2507#else 2508#define vmbus_bus_suspend NULL 2509#define vmbus_bus_resume NULL 2510#endif /* CONFIG_PM_SLEEP */ 2511 2512static const struct acpi_device_id vmbus_acpi_device_ids[] = { 2513 {"VMBUS", 0}, 2514 {"VMBus", 0}, 2515 {"", 0}, 2516}; 2517MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 2518 2519/* 2520 * Note: we must use the "no_irq" ops, otherwise hibernation can not work with 2521 * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in 2522 * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see 2523 * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> 2524 * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's 2525 * resume callback must also run via the "noirq" ops. 2526 * 2527 * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment 2528 * earlier in this file before vmbus_pm. 2529 */ 2530 2531static const struct dev_pm_ops vmbus_bus_pm = { 2532 .suspend_noirq = NULL, 2533 .resume_noirq = NULL, 2534 .freeze_noirq = vmbus_bus_suspend, 2535 .thaw_noirq = vmbus_bus_resume, 2536 .poweroff_noirq = vmbus_bus_suspend, 2537 .restore_noirq = vmbus_bus_resume 2538}; 2539 2540static struct acpi_driver vmbus_acpi_driver = { 2541 .name = "vmbus", 2542 .ids = vmbus_acpi_device_ids, 2543 .ops = { 2544 .add = vmbus_acpi_add, 2545 .remove = vmbus_acpi_remove, 2546 }, 2547 .drv.pm = &vmbus_bus_pm, 2548}; 2549 2550static void hv_kexec_handler(void) 2551{ 2552 hv_stimer_global_cleanup(); 2553 vmbus_initiate_unload(false); 2554 /* Make sure conn_state is set as hv_synic_cleanup checks for it */ 2555 mb(); 2556 cpuhp_remove_state(hyperv_cpuhp_online); 2557}; 2558 2559static void hv_crash_handler(struct pt_regs *regs) 2560{ 2561 int cpu; 2562 2563 vmbus_initiate_unload(true); 2564 /* 2565 * In crash handler we can't schedule synic cleanup for all CPUs, 2566 * doing the cleanup for current CPU only. This should be sufficient 2567 * for kdump. 2568 */ 2569 cpu = smp_processor_id(); 2570 hv_stimer_cleanup(cpu); 2571 hv_synic_disable_regs(cpu); 2572}; 2573 2574static int hv_synic_suspend(void) 2575{ 2576 /* 2577 * When we reach here, all the non-boot CPUs have been offlined. 2578 * If we're in a legacy configuration where stimer Direct Mode is 2579 * not enabled, the stimers on the non-boot CPUs have been unbound 2580 * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> 2581 * hv_stimer_cleanup() -> clockevents_unbind_device(). 2582 * 2583 * hv_synic_suspend() only runs on CPU0 with interrupts disabled. 2584 * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: 2585 * 1) it's unnecessary as interrupts remain disabled between 2586 * syscore_suspend() and syscore_resume(): see create_image() and 2587 * resume_target_kernel() 2588 * 2) the stimer on CPU0 is automatically disabled later by 2589 * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... 2590 * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() 2591 * 3) a warning would be triggered if we call 2592 * clockevents_unbind_device(), which may sleep, in an 2593 * interrupts-disabled context. 2594 */ 2595 2596 hv_synic_disable_regs(0); 2597 2598 return 0; 2599} 2600 2601static void hv_synic_resume(void) 2602{ 2603 hv_synic_enable_regs(0); 2604 2605 /* 2606 * Note: we don't need to call hv_stimer_init(0), because the timer 2607 * on CPU0 is not unbound in hv_synic_suspend(), and the timer is 2608 * automatically re-enabled in timekeeping_resume(). 2609 */ 2610} 2611 2612/* The callbacks run only on CPU0, with irqs_disabled. */ 2613static struct syscore_ops hv_synic_syscore_ops = { 2614 .suspend = hv_synic_suspend, 2615 .resume = hv_synic_resume, 2616}; 2617 2618static int __init hv_acpi_init(void) 2619{ 2620 int ret, t; 2621 2622 if (!hv_is_hyperv_initialized()) 2623 return -ENODEV; 2624 2625 init_completion(&probe_event); 2626 2627 /* 2628 * Get ACPI resources first. 2629 */ 2630 ret = acpi_bus_register_driver(&vmbus_acpi_driver); 2631 2632 if (ret) 2633 return ret; 2634 2635 t = wait_for_completion_timeout(&probe_event, 5*HZ); 2636 if (t == 0) { 2637 ret = -ETIMEDOUT; 2638 goto cleanup; 2639 } 2640 hv_debug_init(); 2641 2642 ret = vmbus_bus_init(); 2643 if (ret) 2644 goto cleanup; 2645 2646 hv_setup_kexec_handler(hv_kexec_handler); 2647 hv_setup_crash_handler(hv_crash_handler); 2648 2649 register_syscore_ops(&hv_synic_syscore_ops); 2650 2651 return 0; 2652 2653cleanup: 2654 acpi_bus_unregister_driver(&vmbus_acpi_driver); 2655 hv_acpi_dev = NULL; 2656 return ret; 2657} 2658 2659static void __exit vmbus_exit(void) 2660{ 2661 int cpu; 2662 2663 unregister_syscore_ops(&hv_synic_syscore_ops); 2664 2665 hv_remove_kexec_handler(); 2666 hv_remove_crash_handler(); 2667 vmbus_connection.conn_state = DISCONNECTED; 2668 hv_stimer_global_cleanup(); 2669 vmbus_disconnect(); 2670 hv_remove_vmbus_irq(); 2671 for_each_online_cpu(cpu) { 2672 struct hv_per_cpu_context *hv_cpu 2673 = per_cpu_ptr(hv_context.cpu_context, cpu); 2674 2675 tasklet_kill(&hv_cpu->msg_dpc); 2676 } 2677 hv_debug_rm_all_dir(); 2678 2679 vmbus_free_channels(); 2680 kfree(vmbus_connection.channels); 2681 2682 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 2683 kmsg_dump_unregister(&hv_kmsg_dumper); 2684 unregister_die_notifier(&hyperv_die_block); 2685 } 2686 2687 /* 2688 * The panic notifier is always registered, hence we should 2689 * also unconditionally unregister it here as well. 2690 */ 2691 atomic_notifier_chain_unregister(&panic_notifier_list, 2692 &hyperv_panic_block); 2693 2694 free_page((unsigned long)hv_panic_page); 2695 unregister_sysctl_table(hv_ctl_table_hdr); 2696 hv_ctl_table_hdr = NULL; 2697 bus_unregister(&hv_bus); 2698 2699 cpuhp_remove_state(hyperv_cpuhp_online); 2700 hv_synic_free(); 2701 acpi_bus_unregister_driver(&vmbus_acpi_driver); 2702} 2703 2704 2705MODULE_LICENSE("GPL"); 2706MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver"); 2707 2708subsys_initcall(hv_acpi_init); 2709module_exit(vmbus_exit); 2710