Lines Matching defs:hdev

35 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
39 static void hl_push_cs_outcome(struct hl_device *hdev,
72 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq);
121 struct hl_device *hdev = hw_sob->hdev;
123 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
125 hdev->asic_funcs->reset_sob(hdev, hw_sob);
134 struct hl_device *hdev = hw_sob->hdev;
136 dev_crit(hdev->dev,
281 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
300 struct hl_device *hdev = hpriv->hdev;
318 rc = hdev->asic_funcs->cs_parser(hdev, &parser);
320 if (is_cb_patched(hdev, job)) {
343 static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
347 if (is_cb_patched(hdev, job)) {
348 hl_userptr_delete_list(hdev, &job->userptr_list);
379 hl_debugfs_remove_job(hdev, job);
399 if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
411 * @hdev: pointer to device structure
414 * @note: This function must be called under 'hdev->cs_mirror_lock'
418 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
422 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
433 * @hdev: pointer to device structure
437 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
453 * @hdev: pointer to device structure
460 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
474 * @hdev: pointer to device structure
479 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
488 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
495 spin_lock(&hdev->cs_mirror_lock);
506 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
511 spin_unlock(&hdev->cs_mirror_lock);
516 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
522 spin_lock(&hdev->cs_mirror_lock);
525 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node)
536 spin_unlock(&hdev->cs_mirror_lock);
542 * @hdev: pointer to habanalabs device structure
544 static void force_complete_multi_cs(struct hl_device *hdev)
551 mcs_compl = &hdev->multi_cs_completion[i];
565 dev_err(hdev->dev,
576 * @hdev: pointer to habanalabs device structure
588 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
600 mcs_compl = &hdev->multi_cs_completion[i];
637 static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
659 dev_dbg(hdev->dev,
669 hdev->asic_funcs->reset_sob_group(hdev,
679 struct hl_device *hdev = cs->ctx->hdev;
695 hl_complete_job(hdev, job);
714 spin_lock(&hdev->cs_mirror_lock);
716 spin_unlock(&hdev->cs_mirror_lock);
718 cs_handle_tdr(hdev, cs);
729 staged_cs_put(hdev, staged_cs);
737 spin_lock(&hdev->cs_mirror_lock);
739 spin_unlock(&hdev->cs_mirror_lock);
759 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL;
773 dev_err(hdev->dev,
781 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
788 complete_multi_cs(hdev, cs);
790 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
802 struct hl_device *hdev;
818 hdev = cs->ctx->hdev;
821 if (hdev->reset_on_lockup)
824 hdev->reset_info.needs_reset = true;
831 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
833 hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
834 hdev->captured_err_info.cs_timeout.seq = cs->sequence;
838 timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000;
842 dev_err(hdev->dev,
848 dev_err(hdev->dev,
854 dev_err(hdev->dev,
860 dev_err(hdev->dev,
866 rc = hl_state_dump(hdev);
868 dev_err(hdev->dev, "Error during system state dump %d\n", rc);
874 hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask);
876 hl_notifier_event_send_all(hdev, event_mask);
880 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
890 cntr = &hdev->aggregated_cs_counters;
913 hdev->reset_info.skip_reset_on_timeout ||
932 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
935 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
945 cs_cmpl->hdev = hdev;
954 (hdev->asic_prop.max_pending_cs - 1)];
965 dev_crit_ratelimited(hdev->dev,
969 dev_dbg_ratelimited(hdev->dev,
983 (hdev->asic_prop.max_pending_cs - 1)] =
1008 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
1012 staged_cs_put(hdev, cs);
1015 hl_complete_job(hdev, job);
1020 * @hdev: pointer to habanalabs device structure
1027 static void release_reserved_encaps_signals(struct hl_device *hdev)
1029 struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
1046 void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
1052 flush_workqueue(hdev->ts_free_obj_wq);
1057 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1058 flush_workqueue(hdev->cq_wq[i]);
1060 flush_workqueue(hdev->cs_cmplt_wq);
1064 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
1067 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
1069 cs_rollback(hdev, cs);
1073 force_complete_multi_cs(hdev);
1075 release_reserved_encaps_signals(hdev);
1097 void hl_release_pending_user_interrupts(struct hl_device *hdev)
1099 struct asic_fixed_properties *prop = &hdev->asic_prop;
1114 interrupt = &hdev->user_interrupt[i];
1118 interrupt = &hdev->common_user_cq_interrupt;
1121 interrupt = &hdev->common_decoder_interrupt;
1125 static void force_complete_cs(struct hl_device *hdev)
1129 spin_lock(&hdev->cs_mirror_lock);
1131 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) {
1136 spin_unlock(&hdev->cs_mirror_lock);
1139 void hl_abort_waiting_for_cs_completions(struct hl_device *hdev)
1141 force_complete_cs(hdev);
1142 force_complete_multi_cs(hdev);
1150 struct hl_device *hdev = cs->ctx->hdev;
1153 hl_complete_job(hdev, job);
1159 struct hl_device *hdev = cs->ctx->hdev;
1163 hl_complete_job(hdev, job);
1166 u32 hl_get_active_cs_num(struct hl_device *hdev)
1171 spin_lock(&hdev->cs_mirror_lock);
1173 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
1177 spin_unlock(&hdev->cs_mirror_lock);
1182 static int validate_queue_index(struct hl_device *hdev,
1187 struct asic_fixed_properties *asic = &hdev->asic_prop;
1194 dev_err(hdev->dev, "Queue index %d is invalid\n",
1202 dev_err(hdev->dev, "Queue index %d is not applicable\n",
1208 dev_err(hdev->dev, "Queue index %d is binned out\n",
1214 dev_err(hdev->dev,
1226 dev_err(hdev->dev,
1236 dev_err(hdev->dev,
1253 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
1261 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle);
1266 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
1279 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
1295 if (is_cb_patched(hdev, job))
1328 struct hl_device *hdev = hpriv->hdev;
1338 dev_dbg(hdev->dev, "Padding bytes must be 0\n");
1342 if (!hl_device_operational(hdev, &status)) {
1347 !hdev->supports_staged_submission) {
1348 dev_err(hdev->dev, "staged submission not supported");
1355 dev_err(hdev->dev,
1367 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
1368 dev_err(hdev->dev, "Sync stream CS is not supported\n");
1374 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
1378 dev_err(hdev->dev,
1387 static int hl_cs_copy_chunk_array(struct hl_device *hdev,
1396 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1397 dev_err(hdev->dev,
1410 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1417 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1418 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
1426 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
1451 staged_cs_get(hdev, cs);
1458 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
1462 for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
1463 if (qid == hdev->stream_master_qid_arr[i])
1475 struct hl_device *hdev = hpriv->hdev;
1486 cntr = &hdev->aggregated_cs_counters;
1490 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
1501 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
1511 rc = cs_staged_submission(hdev, cs, user_sequence, flags,
1528 rc = validate_queue_index(hdev, chunk, &queue_type,
1537 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk);
1557 if (hdev->supports_wait_for_multi_cs)
1559 get_stream_master_qid_mask(hdev,
1566 job = hl_cs_allocate_job(hdev, queue_type,
1571 dev_err(hdev->dev, "Failed to allocate a new job\n");
1601 hl_debugfs_add_job(hdev, job);
1607 dev_err(hdev->dev,
1620 dev_err(hdev->dev,
1634 if (hdev->supports_wait_for_multi_cs)
1640 dev_err(hdev->dev,
1655 cs_rollback(hdev, cs);
1670 struct hl_device *hdev = hpriv->hdev;
1679 if (hdev->supports_ctx_switch)
1686 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1688 dev_err_ratelimited(hdev->dev,
1707 hdev->asic_funcs->restore_phase_topology(hdev);
1713 dev_dbg(hdev->dev,
1718 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
1724 dev_err(hdev->dev,
1734 ret = _hl_cs_wait_ioctl(hdev, ctx,
1735 jiffies_to_usecs(hdev->timeout_jiffies),
1743 dev_err(hdev->dev,
1751 if (hdev->supports_ctx_switch)
1754 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
1755 rc = hl_poll_timeout_memory(hdev,
1757 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1760 dev_err(hdev->dev,
1768 hl_device_reset(hdev, 0);
1777 * @hdev: pointer to device structure
1785 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
1793 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
1815 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
1856 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
1865 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
1883 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1884 dev_err(hdev->dev,
1898 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
1907 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
1908 dev_err(hdev->dev,
1923 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
1932 cntr = &hdev->aggregated_cs_counters;
1934 job = hl_cs_allocate_job(hdev, q_type, true);
1938 dev_err(hdev->dev, "Failed to allocate a new job\n");
1943 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
1945 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
1947 cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW);
1973 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1983 hl_debugfs_add_job(hdev, job);
1995 struct hl_device *hdev = hpriv->hdev;
2003 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
2009 if (q_idx >= hdev->asic_prop.max_queues) {
2010 dev_err(hdev->dev, "Queue index %d is invalid\n",
2016 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2019 dev_err(hdev->dev,
2026 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2045 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
2052 handle->hdev = hdev;
2055 hdev->asic_funcs->hw_queues_lock(hdev);
2065 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
2068 dev_err(hdev->dev, "Failed to switch SOB\n");
2069 hdev->asic_funcs->hw_queues_unlock(hdev);
2086 hdev->asic_funcs->hw_queues_unlock(hdev);
2091 dev_dbg(hdev->dev,
2114 struct hl_device *hdev = hpriv->hdev;
2125 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
2129 hdev->asic_funcs->hw_queues_lock(hdev);
2132 prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
2134 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
2144 dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
2148 hdev->asic_funcs->hw_queues_unlock(hdev);
2159 hdev->asic_funcs->hw_queues_unlock(hdev);
2173 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
2195 struct hl_device *hdev = hpriv->hdev;
2206 cntr = &hdev->aggregated_cs_counters;
2209 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
2217 if (chunk->queue_index >= hdev->asic_prop.max_queues) {
2220 dev_err(hdev->dev, "Queue index %d is invalid\n",
2227 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
2233 dev_err(hdev->dev,
2244 dev_err(hdev->dev,
2250 if (!hdev->nic_ports_mask) {
2253 dev_err(hdev->dev,
2268 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
2301 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
2310 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
2322 dev_err(hdev->dev,
2346 dev_err(hdev->dev,
2362 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
2391 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
2394 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
2418 dev_err(hdev->dev,
2433 cs_rollback(hdev, cs);
2450 struct hl_device *hdev = hpriv->hdev;
2455 if (!hdev->asic_prop.supports_engine_modes)
2458 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
2459 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
2464 dev_err(hdev->dev, "Engine core command is invalid\n");
2474 dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
2479 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
2488 struct hl_device *hdev = hpriv->hdev;
2493 if (!hdev->asic_prop.supports_engine_modes)
2497 dev_err(hdev->dev, "Engine command is invalid\n");
2501 max_num_of_engines = hdev->asic_prop.max_num_of_engines;
2503 max_num_of_engines = hdev->asic_prop.num_engine_cores;
2506 dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
2516 dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
2521 rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
2529 struct hl_device *hdev = hpriv->hdev;
2530 struct asic_fixed_properties *prop = &hdev->asic_prop;
2533 dev_dbg(hdev->dev, "HBW flush is not supported\n");
2574 : hpriv->hdev->timeout_jiffies;
2645 struct hl_device *hdev = ctx->hdev;
2653 dev_notice_ratelimited(hdev->dev,
2661 dev_dbg(hdev->dev,
2722 struct hl_device *hdev = mcs_data->ctx->hdev;
2775 dev_err(hdev->dev,
2827 dev_err(hdev->dev, "Invalid fence status\n");
2843 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
2881 * @hdev: pointer to habanalabs device structure
2891 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
2898 mcs_compl = &hdev->multi_cs_completion[i];
2915 dev_err(hdev->dev, "no available multi-CS completion structure\n");
2969 * @hdev: pointer to habanalabs device structure
2971 void hl_multi_cs_completion_init(struct hl_device *hdev)
2977 mcs_cmpl = &hdev->multi_cs_completion[i];
2994 struct hl_device *hdev = hpriv->hdev;
3007 dev_dbg(hdev->dev, "Padding bytes must be 0\n");
3011 if (!hdev->supports_wait_for_multi_cs) {
3012 dev_err(hdev->dev, "Wait for multi CS is not supported\n");
3019 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
3034 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
3056 mcs_compl = hl_wait_multi_cs_completion_init(hdev);
3113 dev_err_ratelimited(hdev->dev,
3146 struct hl_device *hdev = hpriv->hdev;
3153 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, &timestamp);
3156 dev_err_ratelimited(hdev->dev,
3166 dev_err_ratelimited(hdev->dev,
3171 dev_err_ratelimited(hdev->dev,
3281 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
3313 dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
3397 dev_err_ratelimited(hdev->dev,
3404 dev_err_ratelimited(hdev->dev,
3450 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
3485 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3520 dev_err(hdev->dev, "Failed to copy completion value from user\n");
3529 dev_err_ratelimited(hdev->dev,
3539 dev_err_ratelimited(hdev->dev,
3569 struct hl_device *hdev = hpriv->hdev;
3577 prop = &hdev->asic_prop;
3580 dev_err(hdev->dev, "no user interrupts allowed");
3593 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed",
3598 interrupt = &hdev->user_interrupt[interrupt_id];
3603 interrupt = &hdev->user_interrupt[int_idx];
3606 interrupt = &hdev->common_user_cq_interrupt;
3608 interrupt = &hdev->common_decoder_interrupt;
3610 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
3615 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
3623 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
3643 struct hl_device *hdev = hpriv->hdev;
3651 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active)