Lines Matching defs:hdev
19 bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
21 if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
27 enum hl_device_status hl_device_status(struct hl_device *hdev)
31 if (hdev->disabled)
33 else if (atomic_read(&hdev->in_reset))
44 struct hl_device *hdev;
48 hdev = hpriv->hdev;
56 mutex_lock(&hdev->fpriv_list_lock);
58 hdev->compute_ctx = NULL;
59 mutex_unlock(&hdev->fpriv_list_lock);
86 hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
87 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
99 struct hl_device *hdev;
103 hdev = hpriv->hdev;
105 mutex_lock(&hdev->fpriv_list_lock);
107 mutex_unlock(&hdev->fpriv_list_lock);
166 * @hdev: pointer to habanalabs device structure
176 static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
189 (*dev)->devt = MKDEV(hdev->major, minor);
192 dev_set_drvdata(*dev, hdev);
198 static int device_cdev_sysfs_add(struct hl_device *hdev)
202 rc = cdev_device_add(&hdev->cdev, hdev->dev);
204 dev_err(hdev->dev,
209 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
211 dev_err(hdev->dev,
217 rc = hl_sysfs_init(hdev);
219 dev_err(hdev->dev, "failed to initialize sysfs\n");
223 hdev->cdev_sysfs_created = true;
228 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
230 cdev_device_del(&hdev->cdev, hdev->dev);
234 static void device_cdev_sysfs_del(struct hl_device *hdev)
236 if (!hdev->cdev_sysfs_created)
239 hl_sysfs_fini(hdev);
240 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
241 cdev_device_del(&hdev->cdev, hdev->dev);
244 put_device(hdev->dev);
245 put_device(hdev->dev_ctrl);
251 * @hdev: pointer to habanalabs device structure
256 static int device_early_init(struct hl_device *hdev)
261 switch (hdev->asic_type) {
263 goya_set_asic_funcs(hdev);
264 strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
267 gaudi_set_asic_funcs(hdev);
268 sprintf(hdev->asic_name, "GAUDI");
271 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
272 hdev->asic_type);
276 rc = hdev->asic_funcs->early_init(hdev);
280 rc = hl_asid_init(hdev);
284 if (hdev->asic_prop.completion_queues_count) {
285 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
286 sizeof(*hdev->cq_wq),
288 if (!hdev->cq_wq) {
294 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
296 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
297 if (hdev->cq_wq[i] == NULL) {
298 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
304 hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
305 if (hdev->eq_wq == NULL) {
306 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
311 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
313 if (!hdev->hl_chip_info) {
318 hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
321 if (!hdev->idle_busy_ts_arr) {
326 rc = hl_mmu_if_set_funcs(hdev);
330 hl_cb_mgr_init(&hdev->kernel_cb_mgr);
332 mutex_init(&hdev->send_cpu_message_lock);
333 mutex_init(&hdev->debug_lock);
334 mutex_init(&hdev->mmu_cache_lock);
335 INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
336 spin_lock_init(&hdev->hw_queues_mirror_lock);
337 INIT_LIST_HEAD(&hdev->fpriv_list);
338 mutex_init(&hdev->fpriv_list_lock);
339 atomic_set(&hdev->in_reset, 0);
344 kfree(hdev->idle_busy_ts_arr);
346 kfree(hdev->hl_chip_info);
348 destroy_workqueue(hdev->eq_wq);
350 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
351 if (hdev->cq_wq[i])
352 destroy_workqueue(hdev->cq_wq[i]);
353 kfree(hdev->cq_wq);
355 hl_asid_fini(hdev);
357 if (hdev->asic_funcs->early_fini)
358 hdev->asic_funcs->early_fini(hdev);
366 * @hdev: pointer to habanalabs device structure
369 static void device_early_fini(struct hl_device *hdev)
373 mutex_destroy(&hdev->mmu_cache_lock);
374 mutex_destroy(&hdev->debug_lock);
375 mutex_destroy(&hdev->send_cpu_message_lock);
377 mutex_destroy(&hdev->fpriv_list_lock);
379 hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
381 kfree(hdev->idle_busy_ts_arr);
382 kfree(hdev->hl_chip_info);
384 destroy_workqueue(hdev->eq_wq);
386 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
387 destroy_workqueue(hdev->cq_wq[i]);
388 kfree(hdev->cq_wq);
390 hl_asid_fini(hdev);
392 if (hdev->asic_funcs->early_fini)
393 hdev->asic_funcs->early_fini(hdev);
398 struct hl_device *hdev = container_of(work, struct hl_device,
401 mutex_lock(&hdev->fpriv_list_lock);
403 if (!hdev->compute_ctx)
404 hl_device_set_frequency(hdev, PLL_LOW);
406 mutex_unlock(&hdev->fpriv_list_lock);
408 schedule_delayed_work(&hdev->work_freq,
414 struct hl_device *hdev = container_of(work, struct hl_device,
417 if (hl_device_disabled_or_in_reset(hdev))
420 if (!hdev->asic_funcs->send_heartbeat(hdev))
423 dev_err(hdev->dev, "Device heartbeat failed!\n");
424 hl_device_reset(hdev, true, false);
429 schedule_delayed_work(&hdev->work_heartbeat,
436 * @hdev: pointer to habanalabs device structure
441 static int device_late_init(struct hl_device *hdev)
445 if (hdev->asic_funcs->late_init) {
446 rc = hdev->asic_funcs->late_init(hdev);
448 dev_err(hdev->dev,
454 hdev->high_pll = hdev->asic_prop.high_pll;
457 hdev->curr_pll_profile = PLL_LOW;
459 if (hdev->pm_mng_profile == PM_AUTO)
460 hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
462 hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
464 INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
465 schedule_delayed_work(&hdev->work_freq,
468 if (hdev->heartbeat) {
469 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
470 schedule_delayed_work(&hdev->work_heartbeat,
474 hdev->late_init_done = true;
482 * @hdev: pointer to habanalabs device structure
485 static void device_late_fini(struct hl_device *hdev)
487 if (!hdev->late_init_done)
490 cancel_delayed_work_sync(&hdev->work_freq);
491 if (hdev->heartbeat)
492 cancel_delayed_work_sync(&hdev->work_heartbeat);
494 if (hdev->asic_funcs->late_fini)
495 hdev->asic_funcs->late_fini(hdev);
497 hdev->late_init_done = false;
500 uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
504 u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
510 ts = &hdev->idle_busy_ts_arr[last_index];
521 ts = &hdev->idle_busy_ts_arr[last_index];
585 ts = &hdev->idle_busy_ts_arr[last_index];
599 * @hdev: pointer to habanalabs device structure
609 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
611 if ((hdev->pm_mng_profile == PM_MANUAL) ||
612 (hdev->curr_pll_profile == freq))
615 dev_dbg(hdev->dev, "Changing device frequency to %s\n",
618 hdev->asic_funcs->set_pll_profile(hdev, freq);
620 hdev->curr_pll_profile = freq;
625 int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
629 mutex_lock(&hdev->debug_lock);
632 if (!hdev->in_debug) {
633 dev_err(hdev->dev,
639 if (!hdev->hard_reset_pending)
640 hdev->asic_funcs->halt_coresight(hdev);
642 hdev->in_debug = 0;
644 if (!hdev->hard_reset_pending)
645 hdev->asic_funcs->set_clock_gating(hdev);
650 if (hdev->in_debug) {
651 dev_err(hdev->dev,
657 hdev->asic_funcs->disable_clock_gating(hdev);
658 hdev->in_debug = 1;
661 mutex_unlock(&hdev->debug_lock);
669 * @hdev: pointer to habanalabs device structure
675 int hl_device_suspend(struct hl_device *hdev)
679 pci_save_state(hdev->pdev);
682 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
684 dev_err(hdev->dev, "Can't suspend while in reset\n");
689 hdev->disabled = true;
695 hdev->asic_funcs->hw_queues_lock(hdev);
696 hdev->asic_funcs->hw_queues_unlock(hdev);
699 mutex_lock(&hdev->send_cpu_message_lock);
700 mutex_unlock(&hdev->send_cpu_message_lock);
702 rc = hdev->asic_funcs->suspend(hdev);
704 dev_err(hdev->dev,
708 pci_disable_device(hdev->pdev);
709 pci_set_power_state(hdev->pdev, PCI_D3hot);
717 * @hdev: pointer to habanalabs device structure
723 int hl_device_resume(struct hl_device *hdev)
727 pci_set_power_state(hdev->pdev, PCI_D0);
728 pci_restore_state(hdev->pdev);
729 rc = pci_enable_device_mem(hdev->pdev);
731 dev_err(hdev->dev,
736 pci_set_master(hdev->pdev);
738 rc = hdev->asic_funcs->resume(hdev);
740 dev_err(hdev->dev, "Failed to resume device after suspend\n");
745 hdev->disabled = false;
746 atomic_set(&hdev->in_reset, 0);
748 rc = hl_device_reset(hdev, true, false);
750 dev_err(hdev->dev, "Failed to reset device during resume\n");
757 pci_clear_master(hdev->pdev);
758 pci_disable_device(hdev->pdev);
763 static int device_kill_open_processes(struct hl_device *hdev)
769 if (hdev->pldm)
777 if (!list_empty(&hdev->fpriv_list))
780 mutex_lock(&hdev->fpriv_list_lock);
785 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
788 dev_info(hdev->dev, "Killing user process pid=%d\n",
797 mutex_unlock(&hdev->fpriv_list_lock);
807 while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
808 dev_info(hdev->dev,
816 return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
823 struct hl_device *hdev = device_reset_work->hdev;
825 hl_device_reset(hdev, true, true);
833 * @hdev: pointer to habanalabs device structure
848 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
853 if (!hdev->init_done) {
854 dev_err(hdev->dev,
859 if ((!hard_reset) && (!hdev->supports_soft_reset)) {
860 dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
871 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
885 if (hl_fw_send_pci_access_msg(hdev,
887 dev_warn(hdev->dev,
892 hdev->disabled = true;
897 hdev->asic_funcs->hw_queues_lock(hdev);
898 hdev->asic_funcs->hw_queues_unlock(hdev);
901 mutex_lock(&hdev->fpriv_list_lock);
902 mutex_unlock(&hdev->fpriv_list_lock);
904 dev_err(hdev->dev, "Going to RESET device!\n");
911 hdev->hard_reset_pending = true;
927 device_reset_work->hdev = hdev;
934 device_late_fini(hdev);
940 mutex_lock(&hdev->send_cpu_message_lock);
941 mutex_unlock(&hdev->send_cpu_message_lock);
949 hdev->asic_funcs->halt_engines(hdev, hard_reset);
952 hl_cs_rollback_all(hdev);
959 rc = device_kill_open_processes(hdev);
961 dev_crit(hdev->dev,
969 flush_workqueue(hdev->eq_wq);
973 hdev->asic_funcs->hw_fini(hdev, hard_reset);
977 if (hl_ctx_put(hdev->kernel_ctx) == 1)
978 hdev->kernel_ctx = NULL;
979 hl_vm_fini(hdev);
980 hl_mmu_fini(hdev);
981 hl_eq_reset(hdev, &hdev->event_queue);
985 hl_hw_queue_reset(hdev, hard_reset);
986 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
987 hl_cq_reset(hdev, &hdev->completion_queue[i]);
989 hdev->idle_busy_ts_idx = 0;
990 hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
991 hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
993 if (hdev->cs_active_cnt)
994 dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
995 hdev->cs_active_cnt);
997 mutex_lock(&hdev->fpriv_list_lock);
1000 if (hdev->compute_ctx) {
1001 atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
1002 hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
1005 mutex_unlock(&hdev->fpriv_list_lock);
1010 hdev->device_cpu_disabled = false;
1011 hdev->hard_reset_pending = false;
1013 if (hdev->kernel_ctx) {
1014 dev_crit(hdev->dev,
1020 rc = hl_mmu_init(hdev);
1022 dev_err(hdev->dev,
1028 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1030 if (!hdev->kernel_ctx) {
1032 hl_mmu_fini(hdev);
1036 hdev->compute_ctx = NULL;
1038 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1040 dev_err(hdev->dev,
1042 kfree(hdev->kernel_ctx);
1043 hdev->kernel_ctx = NULL;
1044 hl_mmu_fini(hdev);
1053 hdev->disabled = false;
1055 rc = hdev->asic_funcs->hw_init(hdev);
1057 dev_err(hdev->dev,
1063 rc = hdev->asic_funcs->test_queues(hdev);
1065 dev_err(hdev->dev,
1071 rc = device_late_init(hdev);
1073 dev_err(hdev->dev,
1078 rc = hl_vm_init(hdev);
1080 dev_err(hdev->dev,
1085 hl_set_max_power(hdev);
1087 rc = hdev->asic_funcs->soft_reset_late_init(hdev);
1089 dev_err(hdev->dev,
1095 atomic_set(&hdev->in_reset, 0);
1098 hdev->hard_reset_cnt++;
1100 hdev->soft_reset_cnt++;
1102 dev_warn(hdev->dev, "Successfully finished resetting the device\n");
1107 hdev->disabled = true;
1110 dev_err(hdev->dev,
1112 hdev->hard_reset_cnt++;
1114 dev_err(hdev->dev,
1116 hdev->soft_reset_cnt++;
1121 atomic_set(&hdev->in_reset, 0);
1129 * @hdev: pointer to habanalabs device structure
1135 int hl_device_init(struct hl_device *hdev, struct class *hclass)
1141 name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
1148 rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1149 &hdev->cdev, &hdev->dev);
1156 name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
1163 rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1164 name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1172 rc = device_early_init(hdev);
1180 rc = hdev->asic_funcs->sw_init(hdev);
1189 rc = hl_hw_queues_create(hdev);
1191 dev_err(hdev->dev, "failed to initialize kernel queues\n");
1195 cq_cnt = hdev->asic_prop.completion_queues_count;
1203 hdev->completion_queue = kcalloc(cq_cnt,
1204 sizeof(*hdev->completion_queue),
1207 if (!hdev->completion_queue) {
1208 dev_err(hdev->dev,
1216 rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1217 hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1219 dev_err(hdev->dev,
1223 hdev->completion_queue[i].cq_idx = i;
1231 rc = hl_eq_init(hdev, &hdev->event_queue);
1233 dev_err(hdev->dev, "failed to initialize event queue\n");
1238 rc = hl_mmu_init(hdev);
1240 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1245 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1246 if (!hdev->kernel_ctx) {
1251 hdev->compute_ctx = NULL;
1253 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1255 dev_err(hdev->dev, "failed to initialize kernel context\n");
1256 kfree(hdev->kernel_ctx);
1260 rc = hl_cb_pool_init(hdev);
1262 dev_err(hdev->dev, "failed to initialize CB pool\n");
1266 hl_debugfs_add_device(hdev);
1268 if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
1269 dev_info(hdev->dev,
1271 hdev->asic_funcs->halt_engines(hdev, true);
1272 hdev->asic_funcs->hw_fini(hdev, true);
1285 hdev->disabled = false;
1287 rc = hdev->asic_funcs->hw_init(hdev);
1289 dev_err(hdev->dev, "failed to initialize the H/W\n");
1295 rc = hdev->asic_funcs->test_queues(hdev);
1297 dev_err(hdev->dev, "Failed to detect if device is alive\n");
1302 rc = device_late_init(hdev);
1304 dev_err(hdev->dev, "Failed late initialization\n");
1309 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1310 hdev->asic_name,
1311 hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
1313 rc = hl_vm_init(hdev);
1315 dev_err(hdev->dev, "Failed to initialize memory module\n");
1326 rc = device_cdev_sysfs_add(hdev);
1328 dev_err(hdev->dev,
1337 hl_set_max_power(hdev);
1345 rc = hl_hwmon_init(hdev);
1347 dev_err(hdev->dev, "Failed to initialize hwmon\n");
1352 dev_notice(hdev->dev,
1355 hdev->init_done = true;
1360 if (hl_ctx_put(hdev->kernel_ctx) != 1)
1361 dev_err(hdev->dev,
1364 hl_mmu_fini(hdev);
1366 hl_eq_fini(hdev, &hdev->event_queue);
1369 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1370 kfree(hdev->completion_queue);
1372 hl_hw_queues_destroy(hdev);
1374 hdev->asic_funcs->sw_fini(hdev);
1376 device_early_fini(hdev);
1378 put_device(hdev->dev_ctrl);
1380 put_device(hdev->dev);
1382 hdev->disabled = true;
1384 device_cdev_sysfs_add(hdev);
1385 if (hdev->pdev)
1386 dev_err(&hdev->pdev->dev,
1388 hdev->id / 2);
1391 hdev->id / 2);
1399 * @hdev: pointer to habanalabs device structure
1403 void hl_device_fini(struct hl_device *hdev)
1408 dev_info(hdev->dev, "Removing device\n");
1420 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1423 rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1437 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1440 hdev->disabled = true;
1445 hdev->asic_funcs->hw_queues_lock(hdev);
1446 hdev->asic_funcs->hw_queues_unlock(hdev);
1449 mutex_lock(&hdev->fpriv_list_lock);
1450 mutex_unlock(&hdev->fpriv_list_lock);
1452 hdev->hard_reset_pending = true;
1454 hl_hwmon_fini(hdev);
1456 device_late_fini(hdev);
1458 hl_debugfs_remove_device(hdev);
1465 hdev->asic_funcs->halt_engines(hdev, true);
1468 hl_cs_rollback_all(hdev);
1474 rc = device_kill_open_processes(hdev);
1476 dev_crit(hdev->dev, "Failed to kill all open processes\n");
1478 hl_cb_pool_fini(hdev);
1481 hdev->asic_funcs->hw_fini(hdev, true);
1484 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1485 dev_err(hdev->dev, "kernel ctx is still alive\n");
1487 hl_vm_fini(hdev);
1489 hl_mmu_fini(hdev);
1491 hl_eq_fini(hdev, &hdev->event_queue);
1493 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1494 hl_cq_fini(hdev, &hdev->completion_queue[i]);
1495 kfree(hdev->completion_queue);
1497 hl_hw_queues_destroy(hdev);
1500 hdev->asic_funcs->sw_fini(hdev);
1502 device_early_fini(hdev);
1505 device_cdev_sysfs_del(hdev);
1517 * @hdev: pointer to habanalabs device structure
1523 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1525 return readl(hdev->rmmio + reg);
1531 * @hdev: pointer to habanalabs device structure
1538 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1540 writel(val, hdev->rmmio + reg);