18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci/* 48c2ecf20Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd. 58c2ecf20Sopenharmony_ci * All Rights Reserved. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "habanalabs: " fmt 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include "habanalabs.h" 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/pci.h> 148c2ecf20Sopenharmony_ci#include <linux/aer.h> 158c2ecf20Sopenharmony_ci#include <linux/module.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ciMODULE_AUTHOR(HL_DRIVER_AUTHOR); 228c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(HL_DRIVER_DESC); 238c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2"); 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_cistatic int hl_major; 268c2ecf20Sopenharmony_cistatic struct class *hl_class; 278c2ecf20Sopenharmony_cistatic DEFINE_IDR(hl_devs_idr); 288c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(hl_devs_idr_lock); 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_cistatic int timeout_locked = 5; 318c2ecf20Sopenharmony_cistatic int reset_on_lockup = 1; 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cimodule_param(timeout_locked, int, 0444); 348c2ecf20Sopenharmony_ciMODULE_PARM_DESC(timeout_locked, 358c2ecf20Sopenharmony_ci "Device lockup timeout in seconds (0 = disabled, default 5s)"); 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_cimodule_param(reset_on_lockup, int, 0444); 388c2ecf20Sopenharmony_ciMODULE_PARM_DESC(reset_on_lockup, 398c2ecf20Sopenharmony_ci "Do device reset on lockup (0 = no, 1 = yes, default yes)"); 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci#define PCI_VENDOR_ID_HABANALABS 0x1da3 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#define PCI_IDS_GOYA 0x0001 448c2ecf20Sopenharmony_ci#define PCI_IDS_GAUDI 0x1000 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_cistatic const struct pci_device_id ids[] = { 478c2ecf20Sopenharmony_ci { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, 488c2ecf20Sopenharmony_ci { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, 498c2ecf20Sopenharmony_ci { 0, } 508c2ecf20Sopenharmony_ci}; 518c2ecf20Sopenharmony_ciMODULE_DEVICE_TABLE(pci, ids); 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* 548c2ecf20Sopenharmony_ci * get_asic_type - translate device id to asic type 558c2ecf20Sopenharmony_ci * 568c2ecf20Sopenharmony_ci * @device: id of the PCI device 578c2ecf20Sopenharmony_ci * 588c2ecf20Sopenharmony_ci * Translate device id to asic type. 598c2ecf20Sopenharmony_ci * In case of unidentified device, return -1 608c2ecf20Sopenharmony_ci */ 618c2ecf20Sopenharmony_cistatic enum hl_asic_type get_asic_type(u16 device) 628c2ecf20Sopenharmony_ci{ 638c2ecf20Sopenharmony_ci enum hl_asic_type asic_type; 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci switch (device) { 668c2ecf20Sopenharmony_ci case PCI_IDS_GOYA: 678c2ecf20Sopenharmony_ci asic_type = ASIC_GOYA; 688c2ecf20Sopenharmony_ci break; 698c2ecf20Sopenharmony_ci case PCI_IDS_GAUDI: 708c2ecf20Sopenharmony_ci asic_type = ASIC_GAUDI; 718c2ecf20Sopenharmony_ci break; 728c2ecf20Sopenharmony_ci default: 738c2ecf20Sopenharmony_ci asic_type = ASIC_INVALID; 748c2ecf20Sopenharmony_ci break; 758c2ecf20Sopenharmony_ci } 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci return asic_type; 788c2ecf20Sopenharmony_ci} 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci/* 818c2ecf20Sopenharmony_ci * hl_device_open - open function for habanalabs device 828c2ecf20Sopenharmony_ci * 838c2ecf20Sopenharmony_ci * @inode: pointer to inode structure 848c2ecf20Sopenharmony_ci * @filp: pointer to file structure 858c2ecf20Sopenharmony_ci * 868c2ecf20Sopenharmony_ci * Called when process opens an habanalabs device. 878c2ecf20Sopenharmony_ci */ 888c2ecf20Sopenharmony_ciint hl_device_open(struct inode *inode, struct file *filp) 898c2ecf20Sopenharmony_ci{ 908c2ecf20Sopenharmony_ci struct hl_device *hdev; 918c2ecf20Sopenharmony_ci struct hl_fpriv *hpriv; 928c2ecf20Sopenharmony_ci int rc; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci mutex_lock(&hl_devs_idr_lock); 958c2ecf20Sopenharmony_ci hdev = idr_find(&hl_devs_idr, iminor(inode)); 968c2ecf20Sopenharmony_ci mutex_unlock(&hl_devs_idr_lock); 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci if (!hdev) { 998c2ecf20Sopenharmony_ci pr_err("Couldn't find device %d:%d\n", 1008c2ecf20Sopenharmony_ci imajor(inode), iminor(inode)); 1018c2ecf20Sopenharmony_ci return -ENXIO; 1028c2ecf20Sopenharmony_ci } 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 1058c2ecf20Sopenharmony_ci if (!hpriv) 1068c2ecf20Sopenharmony_ci return -ENOMEM; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci hpriv->hdev = hdev; 1098c2ecf20Sopenharmony_ci filp->private_data = hpriv; 1108c2ecf20Sopenharmony_ci hpriv->filp = filp; 1118c2ecf20Sopenharmony_ci mutex_init(&hpriv->restore_phase_mutex); 1128c2ecf20Sopenharmony_ci kref_init(&hpriv->refcount); 1138c2ecf20Sopenharmony_ci nonseekable_open(inode, filp); 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci hl_cb_mgr_init(&hpriv->cb_mgr); 1168c2ecf20Sopenharmony_ci hl_ctx_mgr_init(&hpriv->ctx_mgr); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci hpriv->taskpid = find_get_pid(current->pid); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci mutex_lock(&hdev->fpriv_list_lock); 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci if (hl_device_disabled_or_in_reset(hdev)) { 1238c2ecf20Sopenharmony_ci dev_err_ratelimited(hdev->dev, 1248c2ecf20Sopenharmony_ci "Can't open %s because it is disabled or in reset\n", 1258c2ecf20Sopenharmony_ci dev_name(hdev->dev)); 1268c2ecf20Sopenharmony_ci rc = -EPERM; 1278c2ecf20Sopenharmony_ci goto out_err; 1288c2ecf20Sopenharmony_ci } 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci if (hdev->in_debug) { 1318c2ecf20Sopenharmony_ci dev_err_ratelimited(hdev->dev, 1328c2ecf20Sopenharmony_ci "Can't open %s because it is being debugged by another user\n", 1338c2ecf20Sopenharmony_ci dev_name(hdev->dev)); 1348c2ecf20Sopenharmony_ci rc = -EPERM; 1358c2ecf20Sopenharmony_ci goto out_err; 1368c2ecf20Sopenharmony_ci } 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci if (hdev->compute_ctx) { 1398c2ecf20Sopenharmony_ci dev_dbg_ratelimited(hdev->dev, 1408c2ecf20Sopenharmony_ci "Can't open %s because another user is working on it\n", 1418c2ecf20Sopenharmony_ci dev_name(hdev->dev)); 1428c2ecf20Sopenharmony_ci rc = -EBUSY; 1438c2ecf20Sopenharmony_ci goto out_err; 1448c2ecf20Sopenharmony_ci } 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci rc = hl_ctx_create(hdev, hpriv); 1478c2ecf20Sopenharmony_ci if (rc) { 1488c2ecf20Sopenharmony_ci dev_err(hdev->dev, "Failed to create context %d\n", rc); 1498c2ecf20Sopenharmony_ci goto out_err; 1508c2ecf20Sopenharmony_ci } 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci /* Device is IDLE at this point so it is legal to change PLLs. 1538c2ecf20Sopenharmony_ci * There is no need to check anything because if the PLL is 1548c2ecf20Sopenharmony_ci * already HIGH, the set function will return without doing 1558c2ecf20Sopenharmony_ci * anything 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_ci hl_device_set_frequency(hdev, PLL_HIGH); 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci list_add(&hpriv->dev_node, &hdev->fpriv_list); 1608c2ecf20Sopenharmony_ci mutex_unlock(&hdev->fpriv_list_lock); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci hl_debugfs_add_file(hpriv); 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci return 0; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ciout_err: 1678c2ecf20Sopenharmony_ci mutex_unlock(&hdev->fpriv_list_lock); 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); 1708c2ecf20Sopenharmony_ci hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); 1718c2ecf20Sopenharmony_ci filp->private_data = NULL; 1728c2ecf20Sopenharmony_ci mutex_destroy(&hpriv->restore_phase_mutex); 1738c2ecf20Sopenharmony_ci put_pid(hpriv->taskpid); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci kfree(hpriv); 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci return rc; 1788c2ecf20Sopenharmony_ci} 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ciint hl_device_open_ctrl(struct inode *inode, struct file *filp) 1818c2ecf20Sopenharmony_ci{ 1828c2ecf20Sopenharmony_ci struct hl_device *hdev; 1838c2ecf20Sopenharmony_ci struct hl_fpriv *hpriv; 1848c2ecf20Sopenharmony_ci int rc; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci mutex_lock(&hl_devs_idr_lock); 1878c2ecf20Sopenharmony_ci hdev = idr_find(&hl_devs_idr, iminor(inode)); 1888c2ecf20Sopenharmony_ci mutex_unlock(&hl_devs_idr_lock); 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci if (!hdev) { 1918c2ecf20Sopenharmony_ci pr_err("Couldn't find device %d:%d\n", 1928c2ecf20Sopenharmony_ci imajor(inode), iminor(inode)); 1938c2ecf20Sopenharmony_ci return -ENXIO; 1948c2ecf20Sopenharmony_ci } 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 1978c2ecf20Sopenharmony_ci if (!hpriv) 1988c2ecf20Sopenharmony_ci return -ENOMEM; 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci mutex_lock(&hdev->fpriv_list_lock); 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci if (hl_device_disabled_or_in_reset(hdev)) { 2038c2ecf20Sopenharmony_ci dev_err_ratelimited(hdev->dev_ctrl, 2048c2ecf20Sopenharmony_ci "Can't open %s because it is disabled or in reset\n", 2058c2ecf20Sopenharmony_ci dev_name(hdev->dev_ctrl)); 2068c2ecf20Sopenharmony_ci rc = -EPERM; 2078c2ecf20Sopenharmony_ci goto out_err; 2088c2ecf20Sopenharmony_ci } 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci list_add(&hpriv->dev_node, &hdev->fpriv_list); 2118c2ecf20Sopenharmony_ci mutex_unlock(&hdev->fpriv_list_lock); 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci hpriv->hdev = hdev; 2148c2ecf20Sopenharmony_ci filp->private_data = hpriv; 2158c2ecf20Sopenharmony_ci hpriv->filp = filp; 2168c2ecf20Sopenharmony_ci hpriv->is_control = true; 2178c2ecf20Sopenharmony_ci nonseekable_open(inode, filp); 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci hpriv->taskpid = find_get_pid(current->pid); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci return 0; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ciout_err: 2248c2ecf20Sopenharmony_ci mutex_unlock(&hdev->fpriv_list_lock); 2258c2ecf20Sopenharmony_ci kfree(hpriv); 2268c2ecf20Sopenharmony_ci return rc; 2278c2ecf20Sopenharmony_ci} 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_cistatic void set_driver_behavior_per_device(struct hl_device *hdev) 2308c2ecf20Sopenharmony_ci{ 2318c2ecf20Sopenharmony_ci hdev->mmu_enable = 1; 2328c2ecf20Sopenharmony_ci hdev->cpu_enable = 1; 2338c2ecf20Sopenharmony_ci hdev->fw_loading = 1; 2348c2ecf20Sopenharmony_ci hdev->cpu_queues_enable = 1; 2358c2ecf20Sopenharmony_ci hdev->heartbeat = 1; 2368c2ecf20Sopenharmony_ci hdev->clock_gating_mask = ULONG_MAX; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci hdev->reset_pcilink = 0; 2398c2ecf20Sopenharmony_ci hdev->axi_drain = 0; 2408c2ecf20Sopenharmony_ci hdev->sram_scrambler_enable = 1; 2418c2ecf20Sopenharmony_ci hdev->dram_scrambler_enable = 1; 2428c2ecf20Sopenharmony_ci hdev->bmc_enable = 1; 2438c2ecf20Sopenharmony_ci hdev->hard_reset_on_fw_events = 1; 2448c2ecf20Sopenharmony_ci} 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci/* 2478c2ecf20Sopenharmony_ci * create_hdev - create habanalabs device instance 2488c2ecf20Sopenharmony_ci * 2498c2ecf20Sopenharmony_ci * @dev: will hold the pointer to the new habanalabs device structure 2508c2ecf20Sopenharmony_ci * @pdev: pointer to the pci device 2518c2ecf20Sopenharmony_ci * @asic_type: in case of simulator device, which device is it 2528c2ecf20Sopenharmony_ci * @minor: in case of simulator device, the minor of the device 2538c2ecf20Sopenharmony_ci * 2548c2ecf20Sopenharmony_ci * Allocate memory for habanalabs device and initialize basic fields 2558c2ecf20Sopenharmony_ci * Identify the ASIC type 2568c2ecf20Sopenharmony_ci * Allocate ID (minor) for the device (only for real devices) 2578c2ecf20Sopenharmony_ci */ 2588c2ecf20Sopenharmony_ciint create_hdev(struct hl_device **dev, struct pci_dev *pdev, 2598c2ecf20Sopenharmony_ci enum hl_asic_type asic_type, int minor) 2608c2ecf20Sopenharmony_ci{ 2618c2ecf20Sopenharmony_ci struct hl_device *hdev; 2628c2ecf20Sopenharmony_ci int rc, main_id, ctrl_id = 0; 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci *dev = NULL; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); 2678c2ecf20Sopenharmony_ci if (!hdev) 2688c2ecf20Sopenharmony_ci return -ENOMEM; 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci /* First, we must find out which ASIC are we handling. This is needed 2718c2ecf20Sopenharmony_ci * to configure the behavior of the driver (kernel parameters) 2728c2ecf20Sopenharmony_ci */ 2738c2ecf20Sopenharmony_ci if (pdev) { 2748c2ecf20Sopenharmony_ci hdev->asic_type = get_asic_type(pdev->device); 2758c2ecf20Sopenharmony_ci if (hdev->asic_type == ASIC_INVALID) { 2768c2ecf20Sopenharmony_ci dev_err(&pdev->dev, "Unsupported ASIC\n"); 2778c2ecf20Sopenharmony_ci rc = -ENODEV; 2788c2ecf20Sopenharmony_ci goto free_hdev; 2798c2ecf20Sopenharmony_ci } 2808c2ecf20Sopenharmony_ci } else { 2818c2ecf20Sopenharmony_ci hdev->asic_type = asic_type; 2828c2ecf20Sopenharmony_ci } 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci hdev->major = hl_major; 2858c2ecf20Sopenharmony_ci hdev->reset_on_lockup = reset_on_lockup; 2868c2ecf20Sopenharmony_ci hdev->pldm = 0; 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci set_driver_behavior_per_device(hdev); 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci if (timeout_locked) 2918c2ecf20Sopenharmony_ci hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); 2928c2ecf20Sopenharmony_ci else 2938c2ecf20Sopenharmony_ci hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci hdev->disabled = true; 2968c2ecf20Sopenharmony_ci hdev->pdev = pdev; /* can be NULL in case of simulator device */ 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci /* Set default DMA mask to 32 bits */ 2998c2ecf20Sopenharmony_ci hdev->dma_mask = 32; 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci mutex_lock(&hl_devs_idr_lock); 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci /* Always save 2 numbers, 1 for main device and 1 for control. 3048c2ecf20Sopenharmony_ci * They must be consecutive 3058c2ecf20Sopenharmony_ci */ 3068c2ecf20Sopenharmony_ci main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, 3078c2ecf20Sopenharmony_ci GFP_KERNEL); 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci if (main_id >= 0) 3108c2ecf20Sopenharmony_ci ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, 3118c2ecf20Sopenharmony_ci main_id + 2, GFP_KERNEL); 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci mutex_unlock(&hl_devs_idr_lock); 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci if ((main_id < 0) || (ctrl_id < 0)) { 3168c2ecf20Sopenharmony_ci if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) 3178c2ecf20Sopenharmony_ci pr_err("too many devices in the system\n"); 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci if (main_id >= 0) { 3208c2ecf20Sopenharmony_ci mutex_lock(&hl_devs_idr_lock); 3218c2ecf20Sopenharmony_ci idr_remove(&hl_devs_idr, main_id); 3228c2ecf20Sopenharmony_ci mutex_unlock(&hl_devs_idr_lock); 3238c2ecf20Sopenharmony_ci } 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci rc = -EBUSY; 3268c2ecf20Sopenharmony_ci goto free_hdev; 3278c2ecf20Sopenharmony_ci } 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci hdev->id = main_id; 3308c2ecf20Sopenharmony_ci hdev->id_control = ctrl_id; 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci *dev = hdev; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci return 0; 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_cifree_hdev: 3378c2ecf20Sopenharmony_ci kfree(hdev); 3388c2ecf20Sopenharmony_ci return rc; 3398c2ecf20Sopenharmony_ci} 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci/* 3428c2ecf20Sopenharmony_ci * destroy_hdev - destroy habanalabs device instance 3438c2ecf20Sopenharmony_ci * 3448c2ecf20Sopenharmony_ci * @dev: pointer to the habanalabs device structure 3458c2ecf20Sopenharmony_ci * 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_civoid destroy_hdev(struct hl_device *hdev) 3488c2ecf20Sopenharmony_ci{ 3498c2ecf20Sopenharmony_ci /* Remove device from the device list */ 3508c2ecf20Sopenharmony_ci mutex_lock(&hl_devs_idr_lock); 3518c2ecf20Sopenharmony_ci idr_remove(&hl_devs_idr, hdev->id); 3528c2ecf20Sopenharmony_ci idr_remove(&hl_devs_idr, hdev->id_control); 3538c2ecf20Sopenharmony_ci mutex_unlock(&hl_devs_idr_lock); 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci kfree(hdev); 3568c2ecf20Sopenharmony_ci} 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_cistatic int hl_pmops_suspend(struct device *dev) 3598c2ecf20Sopenharmony_ci{ 3608c2ecf20Sopenharmony_ci struct hl_device *hdev = dev_get_drvdata(dev); 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci pr_debug("Going to suspend PCI device\n"); 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci if (!hdev) { 3658c2ecf20Sopenharmony_ci pr_err("device pointer is NULL in suspend\n"); 3668c2ecf20Sopenharmony_ci return 0; 3678c2ecf20Sopenharmony_ci } 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci return hl_device_suspend(hdev); 3708c2ecf20Sopenharmony_ci} 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_cistatic int hl_pmops_resume(struct device *dev) 3738c2ecf20Sopenharmony_ci{ 3748c2ecf20Sopenharmony_ci struct hl_device *hdev = dev_get_drvdata(dev); 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci pr_debug("Going to resume PCI device\n"); 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci if (!hdev) { 3798c2ecf20Sopenharmony_ci pr_err("device pointer is NULL in resume\n"); 3808c2ecf20Sopenharmony_ci return 0; 3818c2ecf20Sopenharmony_ci } 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci return hl_device_resume(hdev); 3848c2ecf20Sopenharmony_ci} 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci/* 3878c2ecf20Sopenharmony_ci * hl_pci_probe - probe PCI habanalabs devices 3888c2ecf20Sopenharmony_ci * 3898c2ecf20Sopenharmony_ci * @pdev: pointer to pci device 3908c2ecf20Sopenharmony_ci * @id: pointer to pci device id structure 3918c2ecf20Sopenharmony_ci * 3928c2ecf20Sopenharmony_ci * Standard PCI probe function for habanalabs device. 3938c2ecf20Sopenharmony_ci * Create a new habanalabs device and initialize it according to the 3948c2ecf20Sopenharmony_ci * device's type 3958c2ecf20Sopenharmony_ci */ 3968c2ecf20Sopenharmony_cistatic int hl_pci_probe(struct pci_dev *pdev, 3978c2ecf20Sopenharmony_ci const struct pci_device_id *id) 3988c2ecf20Sopenharmony_ci{ 3998c2ecf20Sopenharmony_ci struct hl_device *hdev; 4008c2ecf20Sopenharmony_ci int rc; 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci dev_info(&pdev->dev, HL_NAME 4038c2ecf20Sopenharmony_ci " device found [%04x:%04x] (rev %x)\n", 4048c2ecf20Sopenharmony_ci (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1); 4078c2ecf20Sopenharmony_ci if (rc) 4088c2ecf20Sopenharmony_ci return rc; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci pci_set_drvdata(pdev, hdev); 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci pci_enable_pcie_error_reporting(pdev); 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci rc = hl_device_init(hdev, hl_class); 4158c2ecf20Sopenharmony_ci if (rc) { 4168c2ecf20Sopenharmony_ci dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); 4178c2ecf20Sopenharmony_ci rc = -ENODEV; 4188c2ecf20Sopenharmony_ci goto disable_device; 4198c2ecf20Sopenharmony_ci } 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci return 0; 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_cidisable_device: 4248c2ecf20Sopenharmony_ci pci_disable_pcie_error_reporting(pdev); 4258c2ecf20Sopenharmony_ci pci_set_drvdata(pdev, NULL); 4268c2ecf20Sopenharmony_ci destroy_hdev(hdev); 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci return rc; 4298c2ecf20Sopenharmony_ci} 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci/* 4328c2ecf20Sopenharmony_ci * hl_pci_remove - remove PCI habanalabs devices 4338c2ecf20Sopenharmony_ci * 4348c2ecf20Sopenharmony_ci * @pdev: pointer to pci device 4358c2ecf20Sopenharmony_ci * 4368c2ecf20Sopenharmony_ci * Standard PCI remove function for habanalabs device 4378c2ecf20Sopenharmony_ci */ 4388c2ecf20Sopenharmony_cistatic void hl_pci_remove(struct pci_dev *pdev) 4398c2ecf20Sopenharmony_ci{ 4408c2ecf20Sopenharmony_ci struct hl_device *hdev; 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci hdev = pci_get_drvdata(pdev); 4438c2ecf20Sopenharmony_ci if (!hdev) 4448c2ecf20Sopenharmony_ci return; 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci hl_device_fini(hdev); 4478c2ecf20Sopenharmony_ci pci_disable_pcie_error_reporting(pdev); 4488c2ecf20Sopenharmony_ci pci_set_drvdata(pdev, NULL); 4498c2ecf20Sopenharmony_ci destroy_hdev(hdev); 4508c2ecf20Sopenharmony_ci} 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci/** 4538c2ecf20Sopenharmony_ci * hl_pci_err_detected - a PCI bus error detected on this device 4548c2ecf20Sopenharmony_ci * 4558c2ecf20Sopenharmony_ci * @pdev: pointer to pci device 4568c2ecf20Sopenharmony_ci * @state: PCI error type 4578c2ecf20Sopenharmony_ci * 4588c2ecf20Sopenharmony_ci * Called by the PCI subsystem whenever a non-correctable 4598c2ecf20Sopenharmony_ci * PCI bus error is detected 4608c2ecf20Sopenharmony_ci */ 4618c2ecf20Sopenharmony_cistatic pci_ers_result_t 4628c2ecf20Sopenharmony_cihl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) 4638c2ecf20Sopenharmony_ci{ 4648c2ecf20Sopenharmony_ci struct hl_device *hdev = pci_get_drvdata(pdev); 4658c2ecf20Sopenharmony_ci enum pci_ers_result result; 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci switch (state) { 4688c2ecf20Sopenharmony_ci case pci_channel_io_normal: 4698c2ecf20Sopenharmony_ci return PCI_ERS_RESULT_CAN_RECOVER; 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci case pci_channel_io_frozen: 4728c2ecf20Sopenharmony_ci dev_warn(hdev->dev, "frozen state error detected\n"); 4738c2ecf20Sopenharmony_ci result = PCI_ERS_RESULT_NEED_RESET; 4748c2ecf20Sopenharmony_ci break; 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci case pci_channel_io_perm_failure: 4778c2ecf20Sopenharmony_ci dev_warn(hdev->dev, "failure state error detected\n"); 4788c2ecf20Sopenharmony_ci result = PCI_ERS_RESULT_DISCONNECT; 4798c2ecf20Sopenharmony_ci break; 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci default: 4828c2ecf20Sopenharmony_ci result = PCI_ERS_RESULT_NONE; 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci hdev->asic_funcs->halt_engines(hdev, true); 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci return result; 4888c2ecf20Sopenharmony_ci} 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci/** 4918c2ecf20Sopenharmony_ci * hl_pci_err_resume - resume after a PCI slot reset 4928c2ecf20Sopenharmony_ci * 4938c2ecf20Sopenharmony_ci * @pdev: pointer to pci device 4948c2ecf20Sopenharmony_ci * 4958c2ecf20Sopenharmony_ci */ 4968c2ecf20Sopenharmony_cistatic void hl_pci_err_resume(struct pci_dev *pdev) 4978c2ecf20Sopenharmony_ci{ 4988c2ecf20Sopenharmony_ci struct hl_device *hdev = pci_get_drvdata(pdev); 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci dev_warn(hdev->dev, "Resuming device after PCI slot reset\n"); 5018c2ecf20Sopenharmony_ci hl_device_resume(hdev); 5028c2ecf20Sopenharmony_ci} 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci/** 5058c2ecf20Sopenharmony_ci * hl_pci_err_slot_reset - a PCI slot reset has just happened 5068c2ecf20Sopenharmony_ci * 5078c2ecf20Sopenharmony_ci * @pdev: pointer to pci device 5088c2ecf20Sopenharmony_ci * 5098c2ecf20Sopenharmony_ci * Determine if the driver can recover from the PCI slot reset 5108c2ecf20Sopenharmony_ci */ 5118c2ecf20Sopenharmony_cistatic pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev) 5128c2ecf20Sopenharmony_ci{ 5138c2ecf20Sopenharmony_ci return PCI_ERS_RESULT_RECOVERED; 5148c2ecf20Sopenharmony_ci} 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_cistatic const struct dev_pm_ops hl_pm_ops = { 5178c2ecf20Sopenharmony_ci .suspend = hl_pmops_suspend, 5188c2ecf20Sopenharmony_ci .resume = hl_pmops_resume, 5198c2ecf20Sopenharmony_ci}; 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_cistatic const struct pci_error_handlers hl_pci_err_handler = { 5228c2ecf20Sopenharmony_ci .error_detected = hl_pci_err_detected, 5238c2ecf20Sopenharmony_ci .slot_reset = hl_pci_err_slot_reset, 5248c2ecf20Sopenharmony_ci .resume = hl_pci_err_resume, 5258c2ecf20Sopenharmony_ci}; 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_cistatic struct pci_driver hl_pci_driver = { 5288c2ecf20Sopenharmony_ci .name = HL_NAME, 5298c2ecf20Sopenharmony_ci .id_table = ids, 5308c2ecf20Sopenharmony_ci .probe = hl_pci_probe, 5318c2ecf20Sopenharmony_ci .remove = hl_pci_remove, 5328c2ecf20Sopenharmony_ci .shutdown = hl_pci_remove, 5338c2ecf20Sopenharmony_ci .driver.pm = &hl_pm_ops, 5348c2ecf20Sopenharmony_ci .err_handler = &hl_pci_err_handler, 5358c2ecf20Sopenharmony_ci}; 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci/* 5388c2ecf20Sopenharmony_ci * hl_init - Initialize the habanalabs kernel driver 5398c2ecf20Sopenharmony_ci */ 5408c2ecf20Sopenharmony_cistatic int __init hl_init(void) 5418c2ecf20Sopenharmony_ci{ 5428c2ecf20Sopenharmony_ci int rc; 5438c2ecf20Sopenharmony_ci dev_t dev; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci pr_info("loading driver\n"); 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME); 5488c2ecf20Sopenharmony_ci if (rc < 0) { 5498c2ecf20Sopenharmony_ci pr_err("unable to get major\n"); 5508c2ecf20Sopenharmony_ci return rc; 5518c2ecf20Sopenharmony_ci } 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci hl_major = MAJOR(dev); 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci hl_class = class_create(THIS_MODULE, HL_NAME); 5568c2ecf20Sopenharmony_ci if (IS_ERR(hl_class)) { 5578c2ecf20Sopenharmony_ci pr_err("failed to allocate class\n"); 5588c2ecf20Sopenharmony_ci rc = PTR_ERR(hl_class); 5598c2ecf20Sopenharmony_ci goto remove_major; 5608c2ecf20Sopenharmony_ci } 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci hl_debugfs_init(); 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci rc = pci_register_driver(&hl_pci_driver); 5658c2ecf20Sopenharmony_ci if (rc) { 5668c2ecf20Sopenharmony_ci pr_err("failed to register pci device\n"); 5678c2ecf20Sopenharmony_ci goto remove_debugfs; 5688c2ecf20Sopenharmony_ci } 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci pr_debug("driver loaded\n"); 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci return 0; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ciremove_debugfs: 5758c2ecf20Sopenharmony_ci hl_debugfs_fini(); 5768c2ecf20Sopenharmony_ci class_destroy(hl_class); 5778c2ecf20Sopenharmony_ciremove_major: 5788c2ecf20Sopenharmony_ci unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 5798c2ecf20Sopenharmony_ci return rc; 5808c2ecf20Sopenharmony_ci} 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci/* 5838c2ecf20Sopenharmony_ci * hl_exit - Release all resources of the habanalabs kernel driver 5848c2ecf20Sopenharmony_ci */ 5858c2ecf20Sopenharmony_cistatic void __exit hl_exit(void) 5868c2ecf20Sopenharmony_ci{ 5878c2ecf20Sopenharmony_ci pci_unregister_driver(&hl_pci_driver); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci /* 5908c2ecf20Sopenharmony_ci * Removing debugfs must be after all devices or simulator devices 5918c2ecf20Sopenharmony_ci * have been removed because otherwise we get a bug in the 5928c2ecf20Sopenharmony_ci * debugfs module for referencing NULL objects 5938c2ecf20Sopenharmony_ci */ 5948c2ecf20Sopenharmony_ci hl_debugfs_fini(); 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci class_destroy(hl_class); 5978c2ecf20Sopenharmony_ci unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci idr_destroy(&hl_devs_idr); 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci pr_debug("driver removed\n"); 6028c2ecf20Sopenharmony_ci} 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_cimodule_init(hl_init); 6058c2ecf20Sopenharmony_cimodule_exit(hl_exit); 606