18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (c) 2005 Cisco Systems.  All rights reserved.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two
58c2ecf20Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the
88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below:
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
118c2ecf20Sopenharmony_ci *     without modification, are permitted provided that the following
128c2ecf20Sopenharmony_ci *     conditions are met:
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *      - Redistributions of source code must retain the above
158c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
168c2ecf20Sopenharmony_ci *        disclaimer.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
198c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
208c2ecf20Sopenharmony_ci *        disclaimer in the documentation and/or other materials
218c2ecf20Sopenharmony_ci *        provided with the distribution.
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
308c2ecf20Sopenharmony_ci * SOFTWARE.
318c2ecf20Sopenharmony_ci */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#include <linux/jiffies.h>
348c2ecf20Sopenharmony_ci#include <linux/module.h>
358c2ecf20Sopenharmony_ci#include <linux/timer.h>
368c2ecf20Sopenharmony_ci#include <linux/workqueue.h>
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#include "mthca_dev.h"
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cienum {
418c2ecf20Sopenharmony_ci	MTHCA_CATAS_POLL_INTERVAL	= 5 * HZ,
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	MTHCA_CATAS_TYPE_INTERNAL	= 0,
448c2ecf20Sopenharmony_ci	MTHCA_CATAS_TYPE_UPLINK		= 3,
458c2ecf20Sopenharmony_ci	MTHCA_CATAS_TYPE_DDR		= 4,
468c2ecf20Sopenharmony_ci	MTHCA_CATAS_TYPE_PARITY		= 5,
478c2ecf20Sopenharmony_ci};
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(catas_lock);
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_cistatic LIST_HEAD(catas_list);
528c2ecf20Sopenharmony_cistatic struct workqueue_struct *catas_wq;
538c2ecf20Sopenharmony_cistatic struct work_struct catas_work;
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistatic int catas_reset_disable;
568c2ecf20Sopenharmony_cimodule_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
578c2ecf20Sopenharmony_ciMODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_cistatic void catas_reset(struct work_struct *work)
608c2ecf20Sopenharmony_ci{
618c2ecf20Sopenharmony_ci	struct mthca_dev *dev, *tmpdev;
628c2ecf20Sopenharmony_ci	LIST_HEAD(tlist);
638c2ecf20Sopenharmony_ci	int ret;
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	mutex_lock(&mthca_device_mutex);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	spin_lock_irq(&catas_lock);
688c2ecf20Sopenharmony_ci	list_splice_init(&catas_list, &tlist);
698c2ecf20Sopenharmony_ci	spin_unlock_irq(&catas_lock);
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
728c2ecf20Sopenharmony_ci		struct pci_dev *pdev = dev->pdev;
738c2ecf20Sopenharmony_ci		ret = __mthca_restart_one(dev->pdev);
748c2ecf20Sopenharmony_ci		/* 'dev' now is not valid */
758c2ecf20Sopenharmony_ci		if (ret)
768c2ecf20Sopenharmony_ci			printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
778c2ecf20Sopenharmony_ci			       pci_name(pdev), ret);
788c2ecf20Sopenharmony_ci		else {
798c2ecf20Sopenharmony_ci			struct mthca_dev *d = pci_get_drvdata(pdev);
808c2ecf20Sopenharmony_ci			mthca_dbg(d, "Reset succeeded\n");
818c2ecf20Sopenharmony_ci		}
828c2ecf20Sopenharmony_ci	}
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	mutex_unlock(&mthca_device_mutex);
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic void handle_catas(struct mthca_dev *dev)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	struct ib_event event;
908c2ecf20Sopenharmony_ci	unsigned long flags;
918c2ecf20Sopenharmony_ci	const char *type;
928c2ecf20Sopenharmony_ci	int i;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	event.device = &dev->ib_dev;
958c2ecf20Sopenharmony_ci	event.event  = IB_EVENT_DEVICE_FATAL;
968c2ecf20Sopenharmony_ci	event.element.port_num = 0;
978c2ecf20Sopenharmony_ci	dev->active = false;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	ib_dispatch_event(&event);
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	switch (swab32(readl(dev->catas_err.map)) >> 24) {
1028c2ecf20Sopenharmony_ci	case MTHCA_CATAS_TYPE_INTERNAL:
1038c2ecf20Sopenharmony_ci		type = "internal error";
1048c2ecf20Sopenharmony_ci		break;
1058c2ecf20Sopenharmony_ci	case MTHCA_CATAS_TYPE_UPLINK:
1068c2ecf20Sopenharmony_ci		type = "uplink bus error";
1078c2ecf20Sopenharmony_ci		break;
1088c2ecf20Sopenharmony_ci	case MTHCA_CATAS_TYPE_DDR:
1098c2ecf20Sopenharmony_ci		type = "DDR data error";
1108c2ecf20Sopenharmony_ci		break;
1118c2ecf20Sopenharmony_ci	case MTHCA_CATAS_TYPE_PARITY:
1128c2ecf20Sopenharmony_ci		type = "internal parity error";
1138c2ecf20Sopenharmony_ci		break;
1148c2ecf20Sopenharmony_ci	default:
1158c2ecf20Sopenharmony_ci		type = "unknown error";
1168c2ecf20Sopenharmony_ci		break;
1178c2ecf20Sopenharmony_ci	}
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	mthca_err(dev, "Catastrophic error detected: %s\n", type);
1208c2ecf20Sopenharmony_ci	for (i = 0; i < dev->catas_err.size; ++i)
1218c2ecf20Sopenharmony_ci		mthca_err(dev, "  buf[%02x]: %08x\n",
1228c2ecf20Sopenharmony_ci			  i, swab32(readl(dev->catas_err.map + i)));
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	if (catas_reset_disable)
1258c2ecf20Sopenharmony_ci		return;
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	spin_lock_irqsave(&catas_lock, flags);
1288c2ecf20Sopenharmony_ci	list_add(&dev->catas_err.list, &catas_list);
1298c2ecf20Sopenharmony_ci	queue_work(catas_wq, &catas_work);
1308c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&catas_lock, flags);
1318c2ecf20Sopenharmony_ci}
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cistatic void poll_catas(struct timer_list *t)
1348c2ecf20Sopenharmony_ci{
1358c2ecf20Sopenharmony_ci	struct mthca_dev *dev = from_timer(dev, t, catas_err.timer);
1368c2ecf20Sopenharmony_ci	int i;
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	for (i = 0; i < dev->catas_err.size; ++i)
1398c2ecf20Sopenharmony_ci		if (readl(dev->catas_err.map + i)) {
1408c2ecf20Sopenharmony_ci			handle_catas(dev);
1418c2ecf20Sopenharmony_ci			return;
1428c2ecf20Sopenharmony_ci		}
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	mod_timer(&dev->catas_err.timer,
1458c2ecf20Sopenharmony_ci		  round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_civoid mthca_start_catas_poll(struct mthca_dev *dev)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	phys_addr_t addr;
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	timer_setup(&dev->catas_err.timer, poll_catas, 0);
1538c2ecf20Sopenharmony_ci	dev->catas_err.map  = NULL;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	addr = pci_resource_start(dev->pdev, 0) +
1568c2ecf20Sopenharmony_ci		((pci_resource_len(dev->pdev, 0) - 1) &
1578c2ecf20Sopenharmony_ci		 dev->catas_err.addr);
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
1608c2ecf20Sopenharmony_ci	if (!dev->catas_err.map) {
1618c2ecf20Sopenharmony_ci		mthca_warn(dev, "couldn't map catastrophic error region "
1628c2ecf20Sopenharmony_ci			   "at 0x%llx/0x%x\n", (unsigned long long) addr,
1638c2ecf20Sopenharmony_ci			   dev->catas_err.size * 4);
1648c2ecf20Sopenharmony_ci		return;
1658c2ecf20Sopenharmony_ci	}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci	dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
1688c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&dev->catas_err.list);
1698c2ecf20Sopenharmony_ci	add_timer(&dev->catas_err.timer);
1708c2ecf20Sopenharmony_ci}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_civoid mthca_stop_catas_poll(struct mthca_dev *dev)
1738c2ecf20Sopenharmony_ci{
1748c2ecf20Sopenharmony_ci	del_timer_sync(&dev->catas_err.timer);
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	if (dev->catas_err.map)
1778c2ecf20Sopenharmony_ci		iounmap(dev->catas_err.map);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	spin_lock_irq(&catas_lock);
1808c2ecf20Sopenharmony_ci	list_del(&dev->catas_err.list);
1818c2ecf20Sopenharmony_ci	spin_unlock_irq(&catas_lock);
1828c2ecf20Sopenharmony_ci}
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ciint __init mthca_catas_init(void)
1858c2ecf20Sopenharmony_ci{
1868c2ecf20Sopenharmony_ci	INIT_WORK(&catas_work, catas_reset);
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	catas_wq = alloc_ordered_workqueue("mthca_catas", WQ_MEM_RECLAIM);
1898c2ecf20Sopenharmony_ci	if (!catas_wq)
1908c2ecf20Sopenharmony_ci		return -ENOMEM;
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	return 0;
1938c2ecf20Sopenharmony_ci}
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_civoid mthca_catas_cleanup(void)
1968c2ecf20Sopenharmony_ci{
1978c2ecf20Sopenharmony_ci	destroy_workqueue(catas_wq);
1988c2ecf20Sopenharmony_ci}
199