162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2005 Cisco Systems. All rights reserved. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This software is available to you under a choice of one of two 562306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci * OpenIB.org BSD license below: 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1162306a36Sopenharmony_ci * without modification, are permitted provided that the following 1262306a36Sopenharmony_ci * conditions are met: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1562306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1662306a36Sopenharmony_ci * disclaimer. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 1962306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2062306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2162306a36Sopenharmony_ci * provided with the distribution. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3062306a36Sopenharmony_ci * SOFTWARE. 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include <linux/jiffies.h> 3462306a36Sopenharmony_ci#include <linux/module.h> 3562306a36Sopenharmony_ci#include <linux/timer.h> 3662306a36Sopenharmony_ci#include <linux/workqueue.h> 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#include "mthca_dev.h" 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cienum { 4162306a36Sopenharmony_ci MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci MTHCA_CATAS_TYPE_INTERNAL = 0, 4462306a36Sopenharmony_ci MTHCA_CATAS_TYPE_UPLINK = 3, 4562306a36Sopenharmony_ci MTHCA_CATAS_TYPE_DDR = 4, 4662306a36Sopenharmony_ci MTHCA_CATAS_TYPE_PARITY = 5, 4762306a36Sopenharmony_ci}; 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_cistatic DEFINE_SPINLOCK(catas_lock); 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic LIST_HEAD(catas_list); 5262306a36Sopenharmony_cistatic struct workqueue_struct *catas_wq; 5362306a36Sopenharmony_cistatic struct work_struct catas_work; 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_cistatic int catas_reset_disable; 5662306a36Sopenharmony_cimodule_param_named(catas_reset_disable, catas_reset_disable, int, 0644); 5762306a36Sopenharmony_ciMODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero"); 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_cistatic void catas_reset(struct work_struct *work) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci struct mthca_dev *dev, *tmpdev; 6262306a36Sopenharmony_ci LIST_HEAD(tlist); 6362306a36Sopenharmony_ci int ret; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci mutex_lock(&mthca_device_mutex); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci spin_lock_irq(&catas_lock); 6862306a36Sopenharmony_ci list_splice_init(&catas_list, &tlist); 6962306a36Sopenharmony_ci spin_unlock_irq(&catas_lock); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { 7262306a36Sopenharmony_ci struct pci_dev *pdev = dev->pdev; 7362306a36Sopenharmony_ci ret = __mthca_restart_one(dev->pdev); 7462306a36Sopenharmony_ci /* 'dev' now is not valid */ 7562306a36Sopenharmony_ci if (ret) 7662306a36Sopenharmony_ci printk(KERN_ERR "mthca %s: Reset failed (%d)\n", 7762306a36Sopenharmony_ci pci_name(pdev), ret); 7862306a36Sopenharmony_ci else { 7962306a36Sopenharmony_ci struct mthca_dev *d = pci_get_drvdata(pdev); 8062306a36Sopenharmony_ci mthca_dbg(d, "Reset succeeded\n"); 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci } 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci mutex_unlock(&mthca_device_mutex); 8562306a36Sopenharmony_ci} 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistatic void handle_catas(struct mthca_dev *dev) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci struct ib_event event; 9062306a36Sopenharmony_ci unsigned long flags; 9162306a36Sopenharmony_ci const char *type; 9262306a36Sopenharmony_ci int i; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci event.device = &dev->ib_dev; 9562306a36Sopenharmony_ci event.event = IB_EVENT_DEVICE_FATAL; 9662306a36Sopenharmony_ci event.element.port_num = 0; 9762306a36Sopenharmony_ci dev->active = false; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci ib_dispatch_event(&event); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci switch (swab32(readl(dev->catas_err.map)) >> 24) { 10262306a36Sopenharmony_ci case MTHCA_CATAS_TYPE_INTERNAL: 10362306a36Sopenharmony_ci type = "internal error"; 10462306a36Sopenharmony_ci break; 10562306a36Sopenharmony_ci case MTHCA_CATAS_TYPE_UPLINK: 10662306a36Sopenharmony_ci type = "uplink bus error"; 10762306a36Sopenharmony_ci break; 10862306a36Sopenharmony_ci case MTHCA_CATAS_TYPE_DDR: 10962306a36Sopenharmony_ci type = "DDR data error"; 11062306a36Sopenharmony_ci break; 11162306a36Sopenharmony_ci case MTHCA_CATAS_TYPE_PARITY: 11262306a36Sopenharmony_ci type = "internal parity error"; 11362306a36Sopenharmony_ci break; 11462306a36Sopenharmony_ci default: 11562306a36Sopenharmony_ci type = "unknown error"; 11662306a36Sopenharmony_ci break; 11762306a36Sopenharmony_ci } 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci mthca_err(dev, "Catastrophic error detected: %s\n", type); 12062306a36Sopenharmony_ci for (i = 0; i < dev->catas_err.size; ++i) 12162306a36Sopenharmony_ci mthca_err(dev, " buf[%02x]: %08x\n", 12262306a36Sopenharmony_ci i, swab32(readl(dev->catas_err.map + i))); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci if (catas_reset_disable) 12562306a36Sopenharmony_ci return; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci spin_lock_irqsave(&catas_lock, flags); 12862306a36Sopenharmony_ci list_add(&dev->catas_err.list, &catas_list); 12962306a36Sopenharmony_ci queue_work(catas_wq, &catas_work); 13062306a36Sopenharmony_ci spin_unlock_irqrestore(&catas_lock, flags); 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic void poll_catas(struct timer_list *t) 13462306a36Sopenharmony_ci{ 13562306a36Sopenharmony_ci struct mthca_dev *dev = from_timer(dev, t, catas_err.timer); 13662306a36Sopenharmony_ci int i; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci for (i = 0; i < dev->catas_err.size; ++i) 13962306a36Sopenharmony_ci if (readl(dev->catas_err.map + i)) { 14062306a36Sopenharmony_ci handle_catas(dev); 14162306a36Sopenharmony_ci return; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci mod_timer(&dev->catas_err.timer, 14562306a36Sopenharmony_ci round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL)); 14662306a36Sopenharmony_ci} 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_civoid mthca_start_catas_poll(struct mthca_dev *dev) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci phys_addr_t addr; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci timer_setup(&dev->catas_err.timer, poll_catas, 0); 15362306a36Sopenharmony_ci dev->catas_err.map = NULL; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci addr = pci_resource_start(dev->pdev, 0) + 15662306a36Sopenharmony_ci ((pci_resource_len(dev->pdev, 0) - 1) & 15762306a36Sopenharmony_ci dev->catas_err.addr); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); 16062306a36Sopenharmony_ci if (!dev->catas_err.map) { 16162306a36Sopenharmony_ci mthca_warn(dev, "couldn't map catastrophic error region " 16262306a36Sopenharmony_ci "at 0x%llx/0x%x\n", (unsigned long long) addr, 16362306a36Sopenharmony_ci dev->catas_err.size * 4); 16462306a36Sopenharmony_ci return; 16562306a36Sopenharmony_ci } 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; 16862306a36Sopenharmony_ci INIT_LIST_HEAD(&dev->catas_err.list); 16962306a36Sopenharmony_ci add_timer(&dev->catas_err.timer); 17062306a36Sopenharmony_ci} 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_civoid mthca_stop_catas_poll(struct mthca_dev *dev) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci del_timer_sync(&dev->catas_err.timer); 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci if (dev->catas_err.map) 17762306a36Sopenharmony_ci iounmap(dev->catas_err.map); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci spin_lock_irq(&catas_lock); 18062306a36Sopenharmony_ci list_del(&dev->catas_err.list); 18162306a36Sopenharmony_ci spin_unlock_irq(&catas_lock); 18262306a36Sopenharmony_ci} 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ciint __init mthca_catas_init(void) 18562306a36Sopenharmony_ci{ 18662306a36Sopenharmony_ci INIT_WORK(&catas_work, catas_reset); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci catas_wq = alloc_ordered_workqueue("mthca_catas", WQ_MEM_RECLAIM); 18962306a36Sopenharmony_ci if (!catas_wq) 19062306a36Sopenharmony_ci return -ENOMEM; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci return 0; 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_civoid mthca_catas_cleanup(void) 19662306a36Sopenharmony_ci{ 19762306a36Sopenharmony_ci destroy_workqueue(catas_wq); 19862306a36Sopenharmony_ci} 199