18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2005 Cisco Systems. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci#include <linux/jiffies.h> 348c2ecf20Sopenharmony_ci#include <linux/module.h> 358c2ecf20Sopenharmony_ci#include <linux/timer.h> 368c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#include "mthca_dev.h" 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_cienum { 418c2ecf20Sopenharmony_ci MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci MTHCA_CATAS_TYPE_INTERNAL = 0, 448c2ecf20Sopenharmony_ci MTHCA_CATAS_TYPE_UPLINK = 3, 458c2ecf20Sopenharmony_ci MTHCA_CATAS_TYPE_DDR = 4, 468c2ecf20Sopenharmony_ci MTHCA_CATAS_TYPE_PARITY = 5, 478c2ecf20Sopenharmony_ci}; 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(catas_lock); 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistatic LIST_HEAD(catas_list); 528c2ecf20Sopenharmony_cistatic struct workqueue_struct *catas_wq; 538c2ecf20Sopenharmony_cistatic struct work_struct catas_work; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_cistatic int catas_reset_disable; 568c2ecf20Sopenharmony_cimodule_param_named(catas_reset_disable, catas_reset_disable, int, 0644); 578c2ecf20Sopenharmony_ciMODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero"); 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistatic void catas_reset(struct work_struct *work) 608c2ecf20Sopenharmony_ci{ 618c2ecf20Sopenharmony_ci struct mthca_dev *dev, *tmpdev; 628c2ecf20Sopenharmony_ci LIST_HEAD(tlist); 638c2ecf20Sopenharmony_ci int ret; 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci mutex_lock(&mthca_device_mutex); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci spin_lock_irq(&catas_lock); 688c2ecf20Sopenharmony_ci list_splice_init(&catas_list, &tlist); 698c2ecf20Sopenharmony_ci spin_unlock_irq(&catas_lock); 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { 728c2ecf20Sopenharmony_ci struct pci_dev *pdev = dev->pdev; 738c2ecf20Sopenharmony_ci ret = __mthca_restart_one(dev->pdev); 748c2ecf20Sopenharmony_ci /* 'dev' now is not valid */ 758c2ecf20Sopenharmony_ci if (ret) 768c2ecf20Sopenharmony_ci printk(KERN_ERR "mthca %s: Reset failed (%d)\n", 778c2ecf20Sopenharmony_ci pci_name(pdev), ret); 788c2ecf20Sopenharmony_ci else { 798c2ecf20Sopenharmony_ci struct mthca_dev *d = pci_get_drvdata(pdev); 808c2ecf20Sopenharmony_ci mthca_dbg(d, "Reset succeeded\n"); 818c2ecf20Sopenharmony_ci } 828c2ecf20Sopenharmony_ci } 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci mutex_unlock(&mthca_device_mutex); 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cistatic void handle_catas(struct mthca_dev *dev) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci struct ib_event event; 908c2ecf20Sopenharmony_ci unsigned long flags; 918c2ecf20Sopenharmony_ci const char *type; 928c2ecf20Sopenharmony_ci int i; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci event.device = &dev->ib_dev; 958c2ecf20Sopenharmony_ci event.event = IB_EVENT_DEVICE_FATAL; 968c2ecf20Sopenharmony_ci event.element.port_num = 0; 978c2ecf20Sopenharmony_ci dev->active = false; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci ib_dispatch_event(&event); 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci switch (swab32(readl(dev->catas_err.map)) >> 24) { 1028c2ecf20Sopenharmony_ci case MTHCA_CATAS_TYPE_INTERNAL: 1038c2ecf20Sopenharmony_ci type = "internal error"; 1048c2ecf20Sopenharmony_ci break; 1058c2ecf20Sopenharmony_ci case MTHCA_CATAS_TYPE_UPLINK: 1068c2ecf20Sopenharmony_ci type = "uplink bus error"; 1078c2ecf20Sopenharmony_ci break; 1088c2ecf20Sopenharmony_ci case MTHCA_CATAS_TYPE_DDR: 1098c2ecf20Sopenharmony_ci type = "DDR data error"; 1108c2ecf20Sopenharmony_ci break; 1118c2ecf20Sopenharmony_ci case MTHCA_CATAS_TYPE_PARITY: 1128c2ecf20Sopenharmony_ci type = "internal parity error"; 1138c2ecf20Sopenharmony_ci break; 1148c2ecf20Sopenharmony_ci default: 1158c2ecf20Sopenharmony_ci type = "unknown error"; 1168c2ecf20Sopenharmony_ci break; 1178c2ecf20Sopenharmony_ci } 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci mthca_err(dev, "Catastrophic error detected: %s\n", type); 1208c2ecf20Sopenharmony_ci for (i = 0; i < dev->catas_err.size; ++i) 1218c2ecf20Sopenharmony_ci mthca_err(dev, " buf[%02x]: %08x\n", 1228c2ecf20Sopenharmony_ci i, swab32(readl(dev->catas_err.map + i))); 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci if (catas_reset_disable) 1258c2ecf20Sopenharmony_ci return; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci spin_lock_irqsave(&catas_lock, flags); 1288c2ecf20Sopenharmony_ci list_add(&dev->catas_err.list, &catas_list); 1298c2ecf20Sopenharmony_ci queue_work(catas_wq, &catas_work); 1308c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&catas_lock, flags); 1318c2ecf20Sopenharmony_ci} 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_cistatic void poll_catas(struct timer_list *t) 1348c2ecf20Sopenharmony_ci{ 1358c2ecf20Sopenharmony_ci struct mthca_dev *dev = from_timer(dev, t, catas_err.timer); 1368c2ecf20Sopenharmony_ci int i; 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci for (i = 0; i < dev->catas_err.size; ++i) 1398c2ecf20Sopenharmony_ci if (readl(dev->catas_err.map + i)) { 1408c2ecf20Sopenharmony_ci handle_catas(dev); 1418c2ecf20Sopenharmony_ci return; 1428c2ecf20Sopenharmony_ci } 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci mod_timer(&dev->catas_err.timer, 1458c2ecf20Sopenharmony_ci round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL)); 1468c2ecf20Sopenharmony_ci} 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_civoid mthca_start_catas_poll(struct mthca_dev *dev) 1498c2ecf20Sopenharmony_ci{ 1508c2ecf20Sopenharmony_ci phys_addr_t addr; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci timer_setup(&dev->catas_err.timer, poll_catas, 0); 1538c2ecf20Sopenharmony_ci dev->catas_err.map = NULL; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci addr = pci_resource_start(dev->pdev, 0) + 1568c2ecf20Sopenharmony_ci ((pci_resource_len(dev->pdev, 0) - 1) & 1578c2ecf20Sopenharmony_ci dev->catas_err.addr); 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); 1608c2ecf20Sopenharmony_ci if (!dev->catas_err.map) { 1618c2ecf20Sopenharmony_ci mthca_warn(dev, "couldn't map catastrophic error region " 1628c2ecf20Sopenharmony_ci "at 0x%llx/0x%x\n", (unsigned long long) addr, 1638c2ecf20Sopenharmony_ci dev->catas_err.size * 4); 1648c2ecf20Sopenharmony_ci return; 1658c2ecf20Sopenharmony_ci } 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; 1688c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dev->catas_err.list); 1698c2ecf20Sopenharmony_ci add_timer(&dev->catas_err.timer); 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_civoid mthca_stop_catas_poll(struct mthca_dev *dev) 1738c2ecf20Sopenharmony_ci{ 1748c2ecf20Sopenharmony_ci del_timer_sync(&dev->catas_err.timer); 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci if (dev->catas_err.map) 1778c2ecf20Sopenharmony_ci iounmap(dev->catas_err.map); 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci spin_lock_irq(&catas_lock); 1808c2ecf20Sopenharmony_ci list_del(&dev->catas_err.list); 1818c2ecf20Sopenharmony_ci spin_unlock_irq(&catas_lock); 1828c2ecf20Sopenharmony_ci} 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ciint __init mthca_catas_init(void) 1858c2ecf20Sopenharmony_ci{ 1868c2ecf20Sopenharmony_ci INIT_WORK(&catas_work, catas_reset); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci catas_wq = alloc_ordered_workqueue("mthca_catas", WQ_MEM_RECLAIM); 1898c2ecf20Sopenharmony_ci if (!catas_wq) 1908c2ecf20Sopenharmony_ci return -ENOMEM; 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci return 0; 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_civoid mthca_catas_cleanup(void) 1968c2ecf20Sopenharmony_ci{ 1978c2ecf20Sopenharmony_ci destroy_workqueue(catas_wq); 1988c2ecf20Sopenharmony_ci} 199