1// SPDX-License-Identifier: GPL-2.0 2/* 3 * PCI Message Signaled Interrupt (MSI) 4 * 5 * Copyright (C) 2003-2004 Intel 6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) 7 * Copyright (C) 2016 Christoph Hellwig. 8 */ 9 10#include <linux/err.h> 11#include <linux/mm.h> 12#include <linux/irq.h> 13#include <linux/interrupt.h> 14#include <linux/export.h> 15#include <linux/ioport.h> 16#include <linux/pci.h> 17#include <linux/proc_fs.h> 18#include <linux/msi.h> 19#include <linux/smp.h> 20#include <linux/errno.h> 21#include <linux/io.h> 22#include <linux/acpi_iort.h> 23#include <linux/slab.h> 24#include <linux/irqdomain.h> 25#include <linux/of_irq.h> 26 27#include "pci.h" 28 29static int pci_msi_enable = 1; 30int pci_msi_ignore_mask; 31 32#define msix_table_size(flags) ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) 33 34#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN 35static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 36{ 37 struct irq_domain *domain; 38 39 domain = dev_get_msi_domain(&dev->dev); 40 if (domain && irq_domain_is_hierarchy(domain)) 41 return msi_domain_alloc_irqs(domain, &dev->dev, nvec); 42 43 return arch_setup_msi_irqs(dev, nvec, type); 44} 45 46static void pci_msi_teardown_msi_irqs(struct pci_dev *dev) 47{ 48 struct irq_domain *domain; 49 50 domain = dev_get_msi_domain(&dev->dev); 51 if (domain && irq_domain_is_hierarchy(domain)) 52 msi_domain_free_irqs(domain, &dev->dev); 53 else 54 arch_teardown_msi_irqs(dev); 55} 56#else 57#define pci_msi_setup_msi_irqs arch_setup_msi_irqs 58#define pci_msi_teardown_msi_irqs arch_teardown_msi_irqs 59#endif 60 61#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS 62/* Arch hooks */ 63int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 64{ 65 struct msi_controller *chip = dev->bus->msi; 66 int err; 67 68 if (!chip || !chip->setup_irq) 69 return -EINVAL; 70 71 err = chip->setup_irq(chip, dev, desc); 72 if (err < 0) 73 return err; 74 75 irq_set_chip_data(desc->irq, chip); 76 77 return 0; 78} 79 80void __weak arch_teardown_msi_irq(unsigned int irq) 81{ 82 struct msi_controller *chip = irq_get_chip_data(irq); 83 84 if (!chip || !chip->teardown_irq) 85 return; 86 87 chip->teardown_irq(chip, irq); 88} 89 90int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 91{ 92 struct msi_controller *chip = dev->bus->msi; 93 struct msi_desc *entry; 94 int ret; 95 96 if (chip && chip->setup_irqs) 97 return chip->setup_irqs(chip, dev, nvec, type); 98 /* 99 * If an architecture wants to support multiple MSI, it needs to 100 * override arch_setup_msi_irqs() 101 */ 102 if (type == PCI_CAP_ID_MSI && nvec > 1) 103 return 1; 104 105 for_each_pci_msi_entry(entry, dev) { 106 ret = arch_setup_msi_irq(dev, entry); 107 if (ret < 0) 108 return ret; 109 if (ret > 0) 110 return -ENOSPC; 111 } 112 113 return 0; 114} 115 116/* 117 * We have a default implementation available as a separate non-weak 118 * function, as it is used by the Xen x86 PCI code 119 */ 120void default_teardown_msi_irqs(struct pci_dev *dev) 121{ 122 int i; 123 struct msi_desc *entry; 124 125 for_each_pci_msi_entry(entry, dev) 126 if (entry->irq) 127 for (i = 0; i < entry->nvec_used; i++) 128 arch_teardown_msi_irq(entry->irq + i); 129} 130 131void __weak arch_teardown_msi_irqs(struct pci_dev *dev) 132{ 133 return default_teardown_msi_irqs(dev); 134} 135#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */ 136 137static void default_restore_msi_irq(struct pci_dev *dev, int irq) 138{ 139 struct msi_desc *entry; 140 141 entry = NULL; 142 if (dev->msix_enabled) { 143 for_each_pci_msi_entry(entry, dev) { 144 if (irq == entry->irq) 145 break; 146 } 147 } else if (dev->msi_enabled) { 148 entry = irq_get_msi_desc(irq); 149 } 150 151 if (entry) 152 __pci_write_msi_msg(entry, &entry->msg); 153} 154 155void __weak arch_restore_msi_irqs(struct pci_dev *dev) 156{ 157 return default_restore_msi_irqs(dev); 158} 159 160static inline __attribute_const__ u32 msi_mask(unsigned x) 161{ 162 /* Don't shift by >= width of type */ 163 if (x >= 5) 164 return 0xffffffff; 165 return (1 << (1 << x)) - 1; 166} 167 168/* 169 * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to 170 * mask all MSI interrupts by clearing the MSI enable bit does not work 171 * reliably as devices without an INTx disable bit will then generate a 172 * level IRQ which will never be cleared. 173 */ 174void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) 175{ 176 raw_spinlock_t *lock = &desc->dev->msi_lock; 177 unsigned long flags; 178 179 if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit) 180 return; 181 182 raw_spin_lock_irqsave(lock, flags); 183 desc->masked &= ~mask; 184 desc->masked |= flag; 185 pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->mask_pos, 186 desc->masked); 187 raw_spin_unlock_irqrestore(lock, flags); 188} 189 190static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) 191{ 192 __pci_msi_desc_mask_irq(desc, mask, flag); 193} 194 195static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) 196{ 197 if (desc->msi_attrib.is_virtual) 198 return NULL; 199 200 return desc->mask_base + 201 desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; 202} 203 204/* 205 * This internal function does not flush PCI writes to the device. 206 * All users must ensure that they read from the device before either 207 * assuming that the device state is up to date, or returning out of this 208 * file. This saves a few milliseconds when initialising devices with lots 209 * of MSI-X interrupts. 210 */ 211u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) 212{ 213 u32 mask_bits = desc->masked; 214 void __iomem *desc_addr; 215 216 if (pci_msi_ignore_mask) 217 return 0; 218 219 desc_addr = pci_msix_desc_addr(desc); 220 if (!desc_addr) 221 return 0; 222 223 mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; 224 if (flag & PCI_MSIX_ENTRY_CTRL_MASKBIT) 225 mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; 226 227 writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 228 229 return mask_bits; 230} 231 232static void msix_mask_irq(struct msi_desc *desc, u32 flag) 233{ 234 desc->masked = __pci_msix_desc_mask_irq(desc, flag); 235} 236 237static void msi_set_mask_bit(struct irq_data *data, u32 flag) 238{ 239 struct msi_desc *desc = irq_data_get_msi_desc(data); 240 241 if (desc->msi_attrib.is_msix) { 242 msix_mask_irq(desc, flag); 243 readl(desc->mask_base); /* Flush write to device */ 244 } else { 245 unsigned offset = data->irq - desc->irq; 246 msi_mask_irq(desc, 1 << offset, flag << offset); 247 } 248} 249 250/** 251 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts 252 * @data: pointer to irqdata associated to that interrupt 253 */ 254void pci_msi_mask_irq(struct irq_data *data) 255{ 256 msi_set_mask_bit(data, 1); 257} 258EXPORT_SYMBOL_GPL(pci_msi_mask_irq); 259 260/** 261 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts 262 * @data: pointer to irqdata associated to that interrupt 263 */ 264void pci_msi_unmask_irq(struct irq_data *data) 265{ 266 msi_set_mask_bit(data, 0); 267} 268EXPORT_SYMBOL_GPL(pci_msi_unmask_irq); 269 270void default_restore_msi_irqs(struct pci_dev *dev) 271{ 272 struct msi_desc *entry; 273 274 for_each_pci_msi_entry(entry, dev) 275 default_restore_msi_irq(dev, entry->irq); 276} 277 278void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 279{ 280 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 281 282 BUG_ON(dev->current_state != PCI_D0); 283 284 if (entry->msi_attrib.is_msix) { 285 void __iomem *base = pci_msix_desc_addr(entry); 286 287 if (!base) { 288 WARN_ON(1); 289 return; 290 } 291 292 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); 293 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); 294 msg->data = readl(base + PCI_MSIX_ENTRY_DATA); 295 } else { 296 int pos = dev->msi_cap; 297 u16 data; 298 299 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 300 &msg->address_lo); 301 if (entry->msi_attrib.is_64) { 302 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 303 &msg->address_hi); 304 pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data); 305 } else { 306 msg->address_hi = 0; 307 pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data); 308 } 309 msg->data = data; 310 } 311} 312 313void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 314{ 315 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 316 317 if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) { 318 /* Don't touch the hardware now */ 319 } else if (entry->msi_attrib.is_msix) { 320 void __iomem *base = pci_msix_desc_addr(entry); 321 bool unmasked = !(entry->masked & PCI_MSIX_ENTRY_CTRL_MASKBIT); 322 323 if (!base) 324 goto skip; 325 326 /* 327 * The specification mandates that the entry is masked 328 * when the message is modified: 329 * 330 * "If software changes the Address or Data value of an 331 * entry while the entry is unmasked, the result is 332 * undefined." 333 */ 334 if (unmasked) 335 __pci_msix_desc_mask_irq(entry, PCI_MSIX_ENTRY_CTRL_MASKBIT); 336 337 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); 338 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); 339 writel(msg->data, base + PCI_MSIX_ENTRY_DATA); 340 341 if (unmasked) 342 __pci_msix_desc_mask_irq(entry, 0); 343 344 /* Ensure that the writes are visible in the device */ 345 readl(base + PCI_MSIX_ENTRY_DATA); 346 } else { 347 int pos = dev->msi_cap; 348 u16 msgctl; 349 350 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 351 msgctl &= ~PCI_MSI_FLAGS_QSIZE; 352 msgctl |= entry->msi_attrib.multiple << 4; 353 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); 354 355 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 356 msg->address_lo); 357 if (entry->msi_attrib.is_64) { 358 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 359 msg->address_hi); 360 pci_write_config_word(dev, pos + PCI_MSI_DATA_64, 361 msg->data); 362 } else { 363 pci_write_config_word(dev, pos + PCI_MSI_DATA_32, 364 msg->data); 365 } 366 /* Ensure that the writes are visible in the device */ 367 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 368 } 369 370skip: 371 entry->msg = *msg; 372 373 if (entry->write_msi_msg) 374 entry->write_msi_msg(entry, entry->write_msi_msg_data); 375 376} 377 378void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) 379{ 380 struct msi_desc *entry = irq_get_msi_desc(irq); 381 382 __pci_write_msi_msg(entry, msg); 383} 384EXPORT_SYMBOL_GPL(pci_write_msi_msg); 385 386static void free_msi_irqs(struct pci_dev *dev) 387{ 388 struct list_head *msi_list = dev_to_msi_list(&dev->dev); 389 struct msi_desc *entry, *tmp; 390 struct attribute **msi_attrs; 391 struct device_attribute *dev_attr; 392 int i, count = 0; 393 394 for_each_pci_msi_entry(entry, dev) 395 if (entry->irq) 396 for (i = 0; i < entry->nvec_used; i++) 397 BUG_ON(irq_has_action(entry->irq + i)); 398 399 if (dev->msi_irq_groups) { 400 sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups); 401 msi_attrs = dev->msi_irq_groups[0]->attrs; 402 while (msi_attrs[count]) { 403 dev_attr = container_of(msi_attrs[count], 404 struct device_attribute, attr); 405 kfree(dev_attr->attr.name); 406 kfree(dev_attr); 407 ++count; 408 } 409 kfree(msi_attrs); 410 kfree(dev->msi_irq_groups[0]); 411 kfree(dev->msi_irq_groups); 412 dev->msi_irq_groups = NULL; 413 } 414 415 pci_msi_teardown_msi_irqs(dev); 416 417 list_for_each_entry_safe(entry, tmp, msi_list, list) { 418 if (entry->msi_attrib.is_msix) { 419 if (list_is_last(&entry->list, msi_list)) 420 iounmap(entry->mask_base); 421 } 422 423 list_del(&entry->list); 424 free_msi_entry(entry); 425 } 426} 427 428static void pci_intx_for_msi(struct pci_dev *dev, int enable) 429{ 430 if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) 431 pci_intx(dev, enable); 432} 433 434static void __pci_restore_msi_state(struct pci_dev *dev) 435{ 436 u16 control; 437 struct msi_desc *entry; 438 439 if (!dev->msi_enabled) 440 return; 441 442 entry = irq_get_msi_desc(dev->irq); 443 444 pci_intx_for_msi(dev, 0); 445 pci_msi_set_enable(dev, 0); 446 arch_restore_msi_irqs(dev); 447 448 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 449 msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap), 450 entry->masked); 451 control &= ~PCI_MSI_FLAGS_QSIZE; 452 control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; 453 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 454} 455 456static void __pci_restore_msix_state(struct pci_dev *dev) 457{ 458 struct msi_desc *entry; 459 460 if (!dev->msix_enabled) 461 return; 462 BUG_ON(list_empty(dev_to_msi_list(&dev->dev))); 463 464 /* route the table */ 465 pci_intx_for_msi(dev, 0); 466 pci_msix_clear_and_set_ctrl(dev, 0, 467 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); 468 469 arch_restore_msi_irqs(dev); 470 for_each_pci_msi_entry(entry, dev) 471 msix_mask_irq(entry, entry->masked); 472 473 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 474} 475 476void pci_restore_msi_state(struct pci_dev *dev) 477{ 478 __pci_restore_msi_state(dev); 479 __pci_restore_msix_state(dev); 480} 481EXPORT_SYMBOL_GPL(pci_restore_msi_state); 482 483static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr, 484 char *buf) 485{ 486 struct msi_desc *entry; 487 unsigned long irq; 488 int retval; 489 490 retval = kstrtoul(attr->attr.name, 10, &irq); 491 if (retval) 492 return retval; 493 494 entry = irq_get_msi_desc(irq); 495 if (entry) 496 return sprintf(buf, "%s\n", 497 entry->msi_attrib.is_msix ? "msix" : "msi"); 498 499 return -ENODEV; 500} 501 502static int populate_msi_sysfs(struct pci_dev *pdev) 503{ 504 struct attribute **msi_attrs; 505 struct attribute *msi_attr; 506 struct device_attribute *msi_dev_attr; 507 struct attribute_group *msi_irq_group; 508 const struct attribute_group **msi_irq_groups; 509 struct msi_desc *entry; 510 int ret = -ENOMEM; 511 int num_msi = 0; 512 int count = 0; 513 int i; 514 515 /* Determine how many msi entries we have */ 516 for_each_pci_msi_entry(entry, pdev) 517 num_msi += entry->nvec_used; 518 if (!num_msi) 519 return 0; 520 521 /* Dynamically create the MSI attributes for the PCI device */ 522 msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL); 523 if (!msi_attrs) 524 return -ENOMEM; 525 for_each_pci_msi_entry(entry, pdev) { 526 for (i = 0; i < entry->nvec_used; i++) { 527 msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL); 528 if (!msi_dev_attr) 529 goto error_attrs; 530 msi_attrs[count] = &msi_dev_attr->attr; 531 532 sysfs_attr_init(&msi_dev_attr->attr); 533 msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d", 534 entry->irq + i); 535 if (!msi_dev_attr->attr.name) 536 goto error_attrs; 537 msi_dev_attr->attr.mode = S_IRUGO; 538 msi_dev_attr->show = msi_mode_show; 539 ++count; 540 } 541 } 542 543 msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL); 544 if (!msi_irq_group) 545 goto error_attrs; 546 msi_irq_group->name = "msi_irqs"; 547 msi_irq_group->attrs = msi_attrs; 548 549 msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL); 550 if (!msi_irq_groups) 551 goto error_irq_group; 552 msi_irq_groups[0] = msi_irq_group; 553 554 ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups); 555 if (ret) 556 goto error_irq_groups; 557 pdev->msi_irq_groups = msi_irq_groups; 558 559 return 0; 560 561error_irq_groups: 562 kfree(msi_irq_groups); 563error_irq_group: 564 kfree(msi_irq_group); 565error_attrs: 566 count = 0; 567 msi_attr = msi_attrs[count]; 568 while (msi_attr) { 569 msi_dev_attr = container_of(msi_attr, struct device_attribute, attr); 570 kfree(msi_attr->name); 571 kfree(msi_dev_attr); 572 ++count; 573 msi_attr = msi_attrs[count]; 574 } 575 kfree(msi_attrs); 576 return ret; 577} 578 579static struct msi_desc * 580msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) 581{ 582 struct irq_affinity_desc *masks = NULL; 583 struct msi_desc *entry; 584 u16 control; 585 586 if (affd) 587 masks = irq_create_affinity_masks(nvec, affd); 588 589 /* MSI Entry Initialization */ 590 entry = alloc_msi_entry(&dev->dev, nvec, masks); 591 if (!entry) 592 goto out; 593 594 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 595 /* Lies, damned lies, and MSIs */ 596 if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) 597 control |= PCI_MSI_FLAGS_MASKBIT; 598 599 entry->msi_attrib.is_msix = 0; 600 entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); 601 entry->msi_attrib.is_virtual = 0; 602 entry->msi_attrib.entry_nr = 0; 603 entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT); 604 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ 605 entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; 606 entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 607 608 if (control & PCI_MSI_FLAGS_64BIT) 609 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 610 else 611 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32; 612 613 /* Save the initial mask status */ 614 if (entry->msi_attrib.maskbit) 615 pci_read_config_dword(dev, entry->mask_pos, &entry->masked); 616 617out: 618 kfree(masks); 619 return entry; 620} 621 622static int msi_verify_entries(struct pci_dev *dev) 623{ 624 struct msi_desc *entry; 625 626 for_each_pci_msi_entry(entry, dev) { 627 if (!dev->no_64bit_msi || !entry->msg.address_hi) 628 continue; 629 pci_err(dev, "Device has broken 64-bit MSI but arch" 630 " tried to assign one above 4G\n"); 631 return -EIO; 632 } 633 return 0; 634} 635 636/** 637 * msi_capability_init - configure device's MSI capability structure 638 * @dev: pointer to the pci_dev data structure of MSI device function 639 * @nvec: number of interrupts to allocate 640 * @affd: description of automatic IRQ affinity assignments (may be %NULL) 641 * 642 * Setup the MSI capability structure of the device with the requested 643 * number of interrupts. A return value of zero indicates the successful 644 * setup of an entry with the new MSI IRQ. A negative return value indicates 645 * an error, and a positive return value indicates the number of interrupts 646 * which could have been allocated. 647 */ 648static int msi_capability_init(struct pci_dev *dev, int nvec, 649 struct irq_affinity *affd) 650{ 651 struct msi_desc *entry; 652 int ret; 653 unsigned mask; 654 655 pci_msi_set_enable(dev, 0); /* Disable MSI during set up */ 656 657 entry = msi_setup_entry(dev, nvec, affd); 658 if (!entry) 659 return -ENOMEM; 660 661 /* All MSIs are unmasked by default; mask them all */ 662 mask = msi_mask(entry->msi_attrib.multi_cap); 663 msi_mask_irq(entry, mask, mask); 664 665 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); 666 667 /* Configure MSI capability structure */ 668 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 669 if (ret) { 670 msi_mask_irq(entry, mask, 0); 671 free_msi_irqs(dev); 672 return ret; 673 } 674 675 ret = msi_verify_entries(dev); 676 if (ret) { 677 msi_mask_irq(entry, mask, 0); 678 free_msi_irqs(dev); 679 return ret; 680 } 681 682 ret = populate_msi_sysfs(dev); 683 if (ret) { 684 msi_mask_irq(entry, mask, 0); 685 free_msi_irqs(dev); 686 return ret; 687 } 688 689 /* Set MSI enabled bits */ 690 pci_intx_for_msi(dev, 0); 691 pci_msi_set_enable(dev, 1); 692 dev->msi_enabled = 1; 693 694 pcibios_free_irq(dev); 695 dev->irq = entry->irq; 696 return 0; 697} 698 699static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) 700{ 701 resource_size_t phys_addr; 702 u32 table_offset; 703 unsigned long flags; 704 u8 bir; 705 706 pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE, 707 &table_offset); 708 bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); 709 flags = pci_resource_flags(dev, bir); 710 if (!flags || (flags & IORESOURCE_UNSET)) 711 return NULL; 712 713 table_offset &= PCI_MSIX_TABLE_OFFSET; 714 phys_addr = pci_resource_start(dev, bir) + table_offset; 715 716 return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); 717} 718 719static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, 720 struct msix_entry *entries, int nvec, 721 struct irq_affinity *affd) 722{ 723 struct irq_affinity_desc *curmsk, *masks = NULL; 724 struct msi_desc *entry; 725 void __iomem *addr; 726 int ret, i; 727 int vec_count = pci_msix_vec_count(dev); 728 729 if (affd) 730 masks = irq_create_affinity_masks(nvec, affd); 731 732 for (i = 0, curmsk = masks; i < nvec; i++) { 733 entry = alloc_msi_entry(&dev->dev, 1, curmsk); 734 if (!entry) { 735 if (!i) 736 iounmap(base); 737 else 738 free_msi_irqs(dev); 739 /* No enough memory. Don't try again */ 740 ret = -ENOMEM; 741 goto out; 742 } 743 744 entry->msi_attrib.is_msix = 1; 745 entry->msi_attrib.is_64 = 1; 746 747 if (entries) 748 entry->msi_attrib.entry_nr = entries[i].entry; 749 else 750 entry->msi_attrib.entry_nr = i; 751 752 entry->msi_attrib.is_virtual = 753 entry->msi_attrib.entry_nr >= vec_count; 754 755 entry->msi_attrib.default_irq = dev->irq; 756 entry->mask_base = base; 757 758 addr = pci_msix_desc_addr(entry); 759 if (addr) 760 entry->masked = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 761 762 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); 763 if (masks) 764 curmsk++; 765 } 766 ret = 0; 767out: 768 kfree(masks); 769 return ret; 770} 771 772static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) 773{ 774 struct msi_desc *entry; 775 776 for_each_pci_msi_entry(entry, dev) { 777 if (entries) { 778 entries->vector = entry->irq; 779 entries++; 780 } 781 } 782} 783 784static void msix_mask_all(void __iomem *base, int tsize) 785{ 786 u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; 787 int i; 788 789 if (pci_msi_ignore_mask) 790 return; 791 792 for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) 793 writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); 794} 795 796/** 797 * msix_capability_init - configure device's MSI-X capability 798 * @dev: pointer to the pci_dev data structure of MSI-X device function 799 * @entries: pointer to an array of struct msix_entry entries 800 * @nvec: number of @entries 801 * @affd: Optional pointer to enable automatic affinity assignment 802 * 803 * Setup the MSI-X capability structure of device function with a 804 * single MSI-X IRQ. A return of zero indicates the successful setup of 805 * requested MSI-X entries with allocated IRQs or non-zero for otherwise. 806 **/ 807static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, 808 int nvec, struct irq_affinity *affd) 809{ 810 void __iomem *base; 811 int ret, tsize; 812 u16 control; 813 814 /* 815 * Some devices require MSI-X to be enabled before the MSI-X 816 * registers can be accessed. Mask all the vectors to prevent 817 * interrupts coming in before they're fully set up. 818 */ 819 pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL | 820 PCI_MSIX_FLAGS_ENABLE); 821 822 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 823 /* Request & Map MSI-X table region */ 824 tsize = msix_table_size(control); 825 base = msix_map_region(dev, tsize); 826 if (!base) { 827 ret = -ENOMEM; 828 goto out_disable; 829 } 830 831 ret = msix_setup_entries(dev, base, entries, nvec, affd); 832 if (ret) 833 goto out_disable; 834 835 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); 836 if (ret) 837 goto out_avail; 838 839 /* Check if all MSI entries honor device restrictions */ 840 ret = msi_verify_entries(dev); 841 if (ret) 842 goto out_free; 843 844 msix_update_entries(dev, entries); 845 846 ret = populate_msi_sysfs(dev); 847 if (ret) 848 goto out_free; 849 850 /* Set MSI-X enabled bits and unmask the function */ 851 pci_intx_for_msi(dev, 0); 852 dev->msix_enabled = 1; 853 854 /* 855 * Ensure that all table entries are masked to prevent 856 * stale entries from firing in a crash kernel. 857 * 858 * Done late to deal with a broken Marvell NVME device 859 * which takes the MSI-X mask bits into account even 860 * when MSI-X is disabled, which prevents MSI delivery. 861 */ 862 msix_mask_all(base, tsize); 863 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 864 865 pcibios_free_irq(dev); 866 return 0; 867 868out_avail: 869 if (ret < 0) { 870 /* 871 * If we had some success, report the number of IRQs 872 * we succeeded in setting up. 873 */ 874 struct msi_desc *entry; 875 int avail = 0; 876 877 for_each_pci_msi_entry(entry, dev) { 878 if (entry->irq != 0) 879 avail++; 880 } 881 if (avail != 0) 882 ret = avail; 883 } 884 885out_free: 886 free_msi_irqs(dev); 887 888out_disable: 889 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); 890 891 return ret; 892} 893 894/** 895 * pci_msi_supported - check whether MSI may be enabled on a device 896 * @dev: pointer to the pci_dev data structure of MSI device function 897 * @nvec: how many MSIs have been requested? 898 * 899 * Look at global flags, the device itself, and its parent buses 900 * to determine if MSI/-X are supported for the device. If MSI/-X is 901 * supported return 1, else return 0. 902 **/ 903static int pci_msi_supported(struct pci_dev *dev, int nvec) 904{ 905 struct pci_bus *bus; 906 907 /* MSI must be globally enabled and supported by the device */ 908 if (!pci_msi_enable) 909 return 0; 910 911 if (!dev || dev->no_msi) 912 return 0; 913 914 /* 915 * You can't ask to have 0 or less MSIs configured. 916 * a) it's stupid .. 917 * b) the list manipulation code assumes nvec >= 1. 918 */ 919 if (nvec < 1) 920 return 0; 921 922 /* 923 * Any bridge which does NOT route MSI transactions from its 924 * secondary bus to its primary bus must set NO_MSI flag on 925 * the secondary pci_bus. 926 * We expect only arch-specific PCI host bus controller driver 927 * or quirks for specific PCI bridges to be setting NO_MSI. 928 */ 929 for (bus = dev->bus; bus; bus = bus->parent) 930 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) 931 return 0; 932 933 return 1; 934} 935 936/** 937 * pci_msi_vec_count - Return the number of MSI vectors a device can send 938 * @dev: device to report about 939 * 940 * This function returns the number of MSI vectors a device requested via 941 * Multiple Message Capable register. It returns a negative errno if the 942 * device is not capable sending MSI interrupts. Otherwise, the call succeeds 943 * and returns a power of two, up to a maximum of 2^5 (32), according to the 944 * MSI specification. 945 **/ 946int pci_msi_vec_count(struct pci_dev *dev) 947{ 948 int ret; 949 u16 msgctl; 950 951 if (!dev->msi_cap) 952 return -EINVAL; 953 954 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); 955 ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); 956 957 return ret; 958} 959EXPORT_SYMBOL(pci_msi_vec_count); 960 961static void pci_msi_shutdown(struct pci_dev *dev) 962{ 963 struct msi_desc *desc; 964 u32 mask; 965 966 if (!pci_msi_enable || !dev || !dev->msi_enabled) 967 return; 968 969 BUG_ON(list_empty(dev_to_msi_list(&dev->dev))); 970 desc = first_pci_msi_entry(dev); 971 972 pci_msi_set_enable(dev, 0); 973 pci_intx_for_msi(dev, 1); 974 dev->msi_enabled = 0; 975 976 /* Return the device with MSI unmasked as initial states */ 977 mask = msi_mask(desc->msi_attrib.multi_cap); 978 msi_mask_irq(desc, mask, 0); 979 980 /* Restore dev->irq to its default pin-assertion IRQ */ 981 dev->irq = desc->msi_attrib.default_irq; 982 pcibios_alloc_irq(dev); 983} 984 985void pci_disable_msi(struct pci_dev *dev) 986{ 987 if (!pci_msi_enable || !dev || !dev->msi_enabled) 988 return; 989 990 pci_msi_shutdown(dev); 991 free_msi_irqs(dev); 992} 993EXPORT_SYMBOL(pci_disable_msi); 994 995/** 996 * pci_msix_vec_count - return the number of device's MSI-X table entries 997 * @dev: pointer to the pci_dev data structure of MSI-X device function 998 * This function returns the number of device's MSI-X table entries and 999 * therefore the number of MSI-X vectors device is capable of sending. 1000 * It returns a negative errno if the device is not capable of sending MSI-X 1001 * interrupts. 1002 **/ 1003int pci_msix_vec_count(struct pci_dev *dev) 1004{ 1005 u16 control; 1006 1007 if (!dev->msix_cap) 1008 return -EINVAL; 1009 1010 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 1011 return msix_table_size(control); 1012} 1013EXPORT_SYMBOL(pci_msix_vec_count); 1014 1015static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, 1016 int nvec, struct irq_affinity *affd, int flags) 1017{ 1018 int nr_entries; 1019 int i, j; 1020 1021 if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) 1022 return -EINVAL; 1023 1024 nr_entries = pci_msix_vec_count(dev); 1025 if (nr_entries < 0) 1026 return nr_entries; 1027 if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) 1028 return nr_entries; 1029 1030 if (entries) { 1031 /* Check for any invalid entries */ 1032 for (i = 0; i < nvec; i++) { 1033 if (entries[i].entry >= nr_entries) 1034 return -EINVAL; /* invalid entry */ 1035 for (j = i + 1; j < nvec; j++) { 1036 if (entries[i].entry == entries[j].entry) 1037 return -EINVAL; /* duplicate entry */ 1038 } 1039 } 1040 } 1041 1042 /* Check whether driver already requested for MSI IRQ */ 1043 if (dev->msi_enabled) { 1044 pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); 1045 return -EINVAL; 1046 } 1047 return msix_capability_init(dev, entries, nvec, affd); 1048} 1049 1050static void pci_msix_shutdown(struct pci_dev *dev) 1051{ 1052 struct msi_desc *entry; 1053 1054 if (!pci_msi_enable || !dev || !dev->msix_enabled) 1055 return; 1056 1057 if (pci_dev_is_disconnected(dev)) { 1058 dev->msix_enabled = 0; 1059 return; 1060 } 1061 1062 /* Return the device with MSI-X masked as initial states */ 1063 for_each_pci_msi_entry(entry, dev) 1064 __pci_msix_desc_mask_irq(entry, 1); 1065 1066 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 1067 pci_intx_for_msi(dev, 1); 1068 dev->msix_enabled = 0; 1069 pcibios_alloc_irq(dev); 1070} 1071 1072void pci_disable_msix(struct pci_dev *dev) 1073{ 1074 if (!pci_msi_enable || !dev || !dev->msix_enabled) 1075 return; 1076 1077 pci_msix_shutdown(dev); 1078 free_msi_irqs(dev); 1079} 1080EXPORT_SYMBOL(pci_disable_msix); 1081 1082void pci_no_msi(void) 1083{ 1084 pci_msi_enable = 0; 1085} 1086 1087/** 1088 * pci_msi_enabled - is MSI enabled? 1089 * 1090 * Returns true if MSI has not been disabled by the command-line option 1091 * pci=nomsi. 1092 **/ 1093int pci_msi_enabled(void) 1094{ 1095 return pci_msi_enable; 1096} 1097EXPORT_SYMBOL(pci_msi_enabled); 1098 1099static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, 1100 struct irq_affinity *affd) 1101{ 1102 int nvec; 1103 int rc; 1104 1105 if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0) 1106 return -EINVAL; 1107 1108 /* Check whether driver already requested MSI-X IRQs */ 1109 if (dev->msix_enabled) { 1110 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); 1111 return -EINVAL; 1112 } 1113 1114 if (maxvec < minvec) 1115 return -ERANGE; 1116 1117 if (WARN_ON_ONCE(dev->msi_enabled)) 1118 return -EINVAL; 1119 1120 nvec = pci_msi_vec_count(dev); 1121 if (nvec < 0) 1122 return nvec; 1123 if (nvec < minvec) 1124 return -ENOSPC; 1125 1126 if (nvec > maxvec) 1127 nvec = maxvec; 1128 1129 for (;;) { 1130 if (affd) { 1131 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 1132 if (nvec < minvec) 1133 return -ENOSPC; 1134 } 1135 1136 rc = msi_capability_init(dev, nvec, affd); 1137 if (rc == 0) 1138 return nvec; 1139 1140 if (rc < 0) 1141 return rc; 1142 if (rc < minvec) 1143 return -ENOSPC; 1144 1145 nvec = rc; 1146 } 1147} 1148 1149/* deprecated, don't use */ 1150int pci_enable_msi(struct pci_dev *dev) 1151{ 1152 int rc = __pci_enable_msi_range(dev, 1, 1, NULL); 1153 if (rc < 0) 1154 return rc; 1155 return 0; 1156} 1157EXPORT_SYMBOL(pci_enable_msi); 1158 1159static int __pci_enable_msix_range(struct pci_dev *dev, 1160 struct msix_entry *entries, int minvec, 1161 int maxvec, struct irq_affinity *affd, 1162 int flags) 1163{ 1164 int rc, nvec = maxvec; 1165 1166 if (maxvec < minvec) 1167 return -ERANGE; 1168 1169 if (WARN_ON_ONCE(dev->msix_enabled)) 1170 return -EINVAL; 1171 1172 for (;;) { 1173 if (affd) { 1174 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 1175 if (nvec < minvec) 1176 return -ENOSPC; 1177 } 1178 1179 rc = __pci_enable_msix(dev, entries, nvec, affd, flags); 1180 if (rc == 0) 1181 return nvec; 1182 1183 if (rc < 0) 1184 return rc; 1185 if (rc < minvec) 1186 return -ENOSPC; 1187 1188 nvec = rc; 1189 } 1190} 1191 1192/** 1193 * pci_enable_msix_range - configure device's MSI-X capability structure 1194 * @dev: pointer to the pci_dev data structure of MSI-X device function 1195 * @entries: pointer to an array of MSI-X entries 1196 * @minvec: minimum number of MSI-X IRQs requested 1197 * @maxvec: maximum number of MSI-X IRQs requested 1198 * 1199 * Setup the MSI-X capability structure of device function with a maximum 1200 * possible number of interrupts in the range between @minvec and @maxvec 1201 * upon its software driver call to request for MSI-X mode enabled on its 1202 * hardware device function. It returns a negative errno if an error occurs. 1203 * If it succeeds, it returns the actual number of interrupts allocated and 1204 * indicates the successful configuration of MSI-X capability structure 1205 * with new allocated MSI-X interrupts. 1206 **/ 1207int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, 1208 int minvec, int maxvec) 1209{ 1210 return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); 1211} 1212EXPORT_SYMBOL(pci_enable_msix_range); 1213 1214/** 1215 * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device 1216 * @dev: PCI device to operate on 1217 * @min_vecs: minimum number of vectors required (must be >= 1) 1218 * @max_vecs: maximum (desired) number of vectors 1219 * @flags: flags or quirks for the allocation 1220 * @affd: optional description of the affinity requirements 1221 * 1222 * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI 1223 * vectors if available, and fall back to a single legacy vector 1224 * if neither is available. Return the number of vectors allocated, 1225 * (which might be smaller than @max_vecs) if successful, or a negative 1226 * error code on error. If less than @min_vecs interrupt vectors are 1227 * available for @dev the function will fail with -ENOSPC. 1228 * 1229 * To get the Linux IRQ number used for a vector that can be passed to 1230 * request_irq() use the pci_irq_vector() helper. 1231 */ 1232int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, 1233 unsigned int max_vecs, unsigned int flags, 1234 struct irq_affinity *affd) 1235{ 1236 struct irq_affinity msi_default_affd = {0}; 1237 int nvecs = -ENOSPC; 1238 1239 if (flags & PCI_IRQ_AFFINITY) { 1240 if (!affd) 1241 affd = &msi_default_affd; 1242 } else { 1243 if (WARN_ON(affd)) 1244 affd = NULL; 1245 } 1246 1247 if (flags & PCI_IRQ_MSIX) { 1248 nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, 1249 affd, flags); 1250 if (nvecs > 0) 1251 return nvecs; 1252 } 1253 1254 if (flags & PCI_IRQ_MSI) { 1255 nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd); 1256 if (nvecs > 0) 1257 return nvecs; 1258 } 1259 1260 /* use legacy IRQ if allowed */ 1261 if (flags & PCI_IRQ_LEGACY) { 1262 if (min_vecs == 1 && dev->irq) { 1263 /* 1264 * Invoke the affinity spreading logic to ensure that 1265 * the device driver can adjust queue configuration 1266 * for the single interrupt case. 1267 */ 1268 if (affd) 1269 irq_create_affinity_masks(1, affd); 1270 pci_intx(dev, 1); 1271 return 1; 1272 } 1273 } 1274 1275 return nvecs; 1276} 1277EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); 1278 1279/** 1280 * pci_free_irq_vectors - free previously allocated IRQs for a device 1281 * @dev: PCI device to operate on 1282 * 1283 * Undoes the allocations and enabling in pci_alloc_irq_vectors(). 1284 */ 1285void pci_free_irq_vectors(struct pci_dev *dev) 1286{ 1287 pci_disable_msix(dev); 1288 pci_disable_msi(dev); 1289} 1290EXPORT_SYMBOL(pci_free_irq_vectors); 1291 1292/** 1293 * pci_irq_vector - return Linux IRQ number of a device vector 1294 * @dev: PCI device to operate on 1295 * @nr: Interrupt vector index (0-based) 1296 * 1297 * @nr has the following meanings depending on the interrupt mode: 1298 * MSI-X: The index in the MSI-X vector table 1299 * MSI: The index of the enabled MSI vectors 1300 * INTx: Must be 0 1301 * 1302 * Return: The Linux interrupt number or -EINVAl if @nr is out of range. 1303 */ 1304int pci_irq_vector(struct pci_dev *dev, unsigned int nr) 1305{ 1306 if (dev->msix_enabled) { 1307 struct msi_desc *entry; 1308 1309 for_each_pci_msi_entry(entry, dev) { 1310 if (entry->msi_attrib.entry_nr == nr) 1311 return entry->irq; 1312 } 1313 WARN_ON_ONCE(1); 1314 return -EINVAL; 1315 } 1316 1317 if (dev->msi_enabled) { 1318 struct msi_desc *entry = first_pci_msi_entry(dev); 1319 1320 if (WARN_ON_ONCE(nr >= entry->nvec_used)) 1321 return -EINVAL; 1322 } else { 1323 if (WARN_ON_ONCE(nr > 0)) 1324 return -EINVAL; 1325 } 1326 1327 return dev->irq + nr; 1328} 1329EXPORT_SYMBOL(pci_irq_vector); 1330 1331/** 1332 * pci_irq_get_affinity - return the affinity of a particular MSI vector 1333 * @dev: PCI device to operate on 1334 * @nr: device-relative interrupt vector index (0-based). 1335 * 1336 * @nr has the following meanings depending on the interrupt mode: 1337 * MSI-X: The index in the MSI-X vector table 1338 * MSI: The index of the enabled MSI vectors 1339 * INTx: Must be 0 1340 * 1341 * Return: A cpumask pointer or NULL if @nr is out of range 1342 */ 1343const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) 1344{ 1345 if (dev->msix_enabled) { 1346 struct msi_desc *entry; 1347 1348 for_each_pci_msi_entry(entry, dev) { 1349 if (entry->msi_attrib.entry_nr == nr) 1350 return &entry->affinity->mask; 1351 } 1352 WARN_ON_ONCE(1); 1353 return NULL; 1354 } else if (dev->msi_enabled) { 1355 struct msi_desc *entry = first_pci_msi_entry(dev); 1356 1357 if (WARN_ON_ONCE(!entry || !entry->affinity || 1358 nr >= entry->nvec_used)) 1359 return NULL; 1360 1361 return &entry->affinity[nr].mask; 1362 } else { 1363 return cpu_possible_mask; 1364 } 1365} 1366EXPORT_SYMBOL(pci_irq_get_affinity); 1367 1368struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) 1369{ 1370 return to_pci_dev(desc->dev); 1371} 1372EXPORT_SYMBOL(msi_desc_to_pci_dev); 1373 1374void *msi_desc_to_pci_sysdata(struct msi_desc *desc) 1375{ 1376 struct pci_dev *dev = msi_desc_to_pci_dev(desc); 1377 1378 return dev->bus->sysdata; 1379} 1380EXPORT_SYMBOL_GPL(msi_desc_to_pci_sysdata); 1381 1382#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN 1383/** 1384 * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space 1385 * @irq_data: Pointer to interrupt data of the MSI interrupt 1386 * @msg: Pointer to the message 1387 */ 1388void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg) 1389{ 1390 struct msi_desc *desc = irq_data_get_msi_desc(irq_data); 1391 1392 /* 1393 * For MSI-X desc->irq is always equal to irq_data->irq. For 1394 * MSI only the first interrupt of MULTI MSI passes the test. 1395 */ 1396 if (desc->irq == irq_data->irq) 1397 __pci_write_msi_msg(desc, msg); 1398} 1399 1400/** 1401 * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source 1402 * @desc: Pointer to the MSI descriptor 1403 * 1404 * The ID number is only used within the irqdomain. 1405 */ 1406static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) 1407{ 1408 struct pci_dev *dev = msi_desc_to_pci_dev(desc); 1409 1410 return (irq_hw_number_t)desc->msi_attrib.entry_nr | 1411 pci_dev_id(dev) << 11 | 1412 (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; 1413} 1414 1415static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc) 1416{ 1417 return !desc->msi_attrib.is_msix && desc->nvec_used > 1; 1418} 1419 1420/** 1421 * pci_msi_domain_check_cap - Verify that @domain supports the capabilities 1422 * for @dev 1423 * @domain: The interrupt domain to check 1424 * @info: The domain info for verification 1425 * @dev: The device to check 1426 * 1427 * Returns: 1428 * 0 if the functionality is supported 1429 * 1 if Multi MSI is requested, but the domain does not support it 1430 * -ENOTSUPP otherwise 1431 */ 1432int pci_msi_domain_check_cap(struct irq_domain *domain, 1433 struct msi_domain_info *info, struct device *dev) 1434{ 1435 struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev)); 1436 1437 /* Special handling to support __pci_enable_msi_range() */ 1438 if (pci_msi_desc_is_multi_msi(desc) && 1439 !(info->flags & MSI_FLAG_MULTI_PCI_MSI)) 1440 return 1; 1441 else if (desc->msi_attrib.is_msix && !(info->flags & MSI_FLAG_PCI_MSIX)) 1442 return -ENOTSUPP; 1443 1444 return 0; 1445} 1446 1447static int pci_msi_domain_handle_error(struct irq_domain *domain, 1448 struct msi_desc *desc, int error) 1449{ 1450 /* Special handling to support __pci_enable_msi_range() */ 1451 if (pci_msi_desc_is_multi_msi(desc) && error == -ENOSPC) 1452 return 1; 1453 1454 return error; 1455} 1456 1457static void pci_msi_domain_set_desc(msi_alloc_info_t *arg, 1458 struct msi_desc *desc) 1459{ 1460 arg->desc = desc; 1461 arg->hwirq = pci_msi_domain_calc_hwirq(desc); 1462} 1463 1464static struct msi_domain_ops pci_msi_domain_ops_default = { 1465 .set_desc = pci_msi_domain_set_desc, 1466 .msi_check = pci_msi_domain_check_cap, 1467 .handle_error = pci_msi_domain_handle_error, 1468}; 1469 1470static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info) 1471{ 1472 struct msi_domain_ops *ops = info->ops; 1473 1474 if (ops == NULL) { 1475 info->ops = &pci_msi_domain_ops_default; 1476 } else { 1477 if (ops->set_desc == NULL) 1478 ops->set_desc = pci_msi_domain_set_desc; 1479 if (ops->msi_check == NULL) 1480 ops->msi_check = pci_msi_domain_check_cap; 1481 if (ops->handle_error == NULL) 1482 ops->handle_error = pci_msi_domain_handle_error; 1483 } 1484} 1485 1486static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info) 1487{ 1488 struct irq_chip *chip = info->chip; 1489 1490 BUG_ON(!chip); 1491 if (!chip->irq_write_msi_msg) 1492 chip->irq_write_msi_msg = pci_msi_domain_write_msg; 1493 if (!chip->irq_mask) 1494 chip->irq_mask = pci_msi_mask_irq; 1495 if (!chip->irq_unmask) 1496 chip->irq_unmask = pci_msi_unmask_irq; 1497} 1498 1499/** 1500 * pci_msi_create_irq_domain - Create a MSI interrupt domain 1501 * @fwnode: Optional fwnode of the interrupt controller 1502 * @info: MSI domain info 1503 * @parent: Parent irq domain 1504 * 1505 * Updates the domain and chip ops and creates a MSI interrupt domain. 1506 * 1507 * Returns: 1508 * A domain pointer or NULL in case of failure. 1509 */ 1510struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, 1511 struct msi_domain_info *info, 1512 struct irq_domain *parent) 1513{ 1514 struct irq_domain *domain; 1515 1516 if (WARN_ON(info->flags & MSI_FLAG_LEVEL_CAPABLE)) 1517 info->flags &= ~MSI_FLAG_LEVEL_CAPABLE; 1518 1519 if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS) 1520 pci_msi_domain_update_dom_ops(info); 1521 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) 1522 pci_msi_domain_update_chip_ops(info); 1523 1524 info->flags |= MSI_FLAG_ACTIVATE_EARLY; 1525 if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) 1526 info->flags |= MSI_FLAG_MUST_REACTIVATE; 1527 1528 /* PCI-MSI is oneshot-safe */ 1529 info->chip->flags |= IRQCHIP_ONESHOT_SAFE; 1530 1531 domain = msi_create_irq_domain(fwnode, info, parent); 1532 if (!domain) 1533 return NULL; 1534 1535 irq_domain_update_bus_token(domain, DOMAIN_BUS_PCI_MSI); 1536 return domain; 1537} 1538EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain); 1539 1540/* 1541 * Users of the generic MSI infrastructure expect a device to have a single ID, 1542 * so with DMA aliases we have to pick the least-worst compromise. Devices with 1543 * DMA phantom functions tend to still emit MSIs from the real function number, 1544 * so we ignore those and only consider topological aliases where either the 1545 * alias device or RID appears on a different bus number. We also make the 1546 * reasonable assumption that bridges are walked in an upstream direction (so 1547 * the last one seen wins), and the much braver assumption that the most likely 1548 * case is that of PCI->PCIe so we should always use the alias RID. This echoes 1549 * the logic from intel_irq_remapping's set_msi_sid(), which presumably works 1550 * well enough in practice; in the face of the horrible PCIe<->PCI-X conditions 1551 * for taking ownership all we can really do is close our eyes and hope... 1552 */ 1553static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data) 1554{ 1555 u32 *pa = data; 1556 u8 bus = PCI_BUS_NUM(*pa); 1557 1558 if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) 1559 *pa = alias; 1560 1561 return 0; 1562} 1563 1564/** 1565 * pci_msi_domain_get_msi_rid - Get the MSI requester id (RID) 1566 * @domain: The interrupt domain 1567 * @pdev: The PCI device. 1568 * 1569 * The RID for a device is formed from the alias, with a firmware 1570 * supplied mapping applied 1571 * 1572 * Returns: The RID. 1573 */ 1574u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) 1575{ 1576 struct device_node *of_node; 1577 u32 rid = pci_dev_id(pdev); 1578 1579 pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); 1580 1581 of_node = irq_domain_get_of_node(domain); 1582 rid = of_node ? of_msi_map_id(&pdev->dev, of_node, rid) : 1583 iort_msi_map_id(&pdev->dev, rid); 1584 1585 return rid; 1586} 1587 1588/** 1589 * pci_msi_get_device_domain - Get the MSI domain for a given PCI device 1590 * @pdev: The PCI device 1591 * 1592 * Use the firmware data to find a device-specific MSI domain 1593 * (i.e. not one that is set as a default). 1594 * 1595 * Returns: The corresponding MSI domain or NULL if none has been found. 1596 */ 1597struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) 1598{ 1599 struct irq_domain *dom; 1600 u32 rid = pci_dev_id(pdev); 1601 1602 pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); 1603 dom = of_msi_map_get_device_domain(&pdev->dev, rid, DOMAIN_BUS_PCI_MSI); 1604 if (!dom) 1605 dom = iort_get_device_domain(&pdev->dev, rid, 1606 DOMAIN_BUS_PCI_MSI); 1607 return dom; 1608} 1609 1610/** 1611 * pci_dev_has_special_msi_domain - Check whether the device is handled by 1612 * a non-standard PCI-MSI domain 1613 * @pdev: The PCI device to check. 1614 * 1615 * Returns: True if the device irqdomain or the bus irqdomain is 1616 * non-standard PCI/MSI. 1617 */ 1618bool pci_dev_has_special_msi_domain(struct pci_dev *pdev) 1619{ 1620 struct irq_domain *dom = dev_get_msi_domain(&pdev->dev); 1621 1622 if (!dom) 1623 dom = dev_get_msi_domain(&pdev->bus->dev); 1624 1625 if (!dom) 1626 return true; 1627 1628 return dom->bus_token != DOMAIN_BUS_PCI_MSI; 1629} 1630 1631#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ 1632