1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include "habanalabs.h" 9 10#include <linux/slab.h> 11 12/* 13 * hl_queue_add_ptr - add to pi or ci and checks if it wraps around 14 * 15 * @ptr: the current pi/ci value 16 * @val: the amount to add 17 * 18 * Add val to ptr. It can go until twice the queue length. 19 */ 20inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val) 21{ 22 ptr += val; 23 ptr &= ((HL_QUEUE_LENGTH << 1) - 1); 24 return ptr; 25} 26static inline int queue_ci_get(atomic_t *ci, u32 queue_len) 27{ 28 return atomic_read(ci) & ((queue_len << 1) - 1); 29} 30 31static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len) 32{ 33 int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); 34 35 if (delta >= 0) 36 return (queue_len - delta); 37 else 38 return (abs(delta) - queue_len); 39} 40 41void hl_int_hw_queue_update_ci(struct hl_cs *cs) 42{ 43 struct hl_device *hdev = cs->ctx->hdev; 44 struct hl_hw_queue *q; 45 int i; 46 47 if (hdev->disabled) 48 return; 49 50 q = &hdev->kernel_queues[0]; 51 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { 52 if (q->queue_type == QUEUE_TYPE_INT) 53 atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); 54 } 55} 56 57/* 58 * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a 59 * H/W queue. 60 * @hdev: pointer to habanalabs device structure 61 * @q: pointer to habanalabs queue structure 62 * @ctl: BD's control word 63 * @len: BD's length 64 * @ptr: BD's pointer 65 * 66 * This function assumes there is enough space on the queue to submit a new 67 * BD to it. It initializes the next BD and calls the device specific 68 * function to set the pi (and doorbell) 69 * 70 * This function must be called when the scheduler mutex is taken 71 * 72 */ 73static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, 74 struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) 75{ 76 struct hl_bd *bd; 77 78 bd = q->kernel_address; 79 bd += hl_pi_2_offset(q->pi); 80 bd->ctl = cpu_to_le32(ctl); 81 bd->len = cpu_to_le32(len); 82 bd->ptr = cpu_to_le64(ptr); 83 84 q->pi = hl_queue_inc_ptr(q->pi); 85 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); 86} 87 88/* 89 * ext_queue_sanity_checks - perform some sanity checks on external queue 90 * 91 * @hdev : pointer to hl_device structure 92 * @q : pointer to hl_hw_queue structure 93 * @num_of_entries : how many entries to check for space 94 * @reserve_cq_entry : whether to reserve an entry in the cq 95 * 96 * H/W queues spinlock should be taken before calling this function 97 * 98 * Perform the following: 99 * - Make sure we have enough space in the h/w queue 100 * - Make sure we have enough space in the completion queue 101 * - Reserve space in the completion queue (needs to be reversed if there 102 * is a failure down the road before the actual submission of work). Only 103 * do this action if reserve_cq_entry is true 104 * 105 */ 106static int ext_queue_sanity_checks(struct hl_device *hdev, 107 struct hl_hw_queue *q, int num_of_entries, 108 bool reserve_cq_entry) 109{ 110 atomic_t *free_slots = 111 &hdev->completion_queue[q->cq_id].free_slots_cnt; 112 int free_slots_cnt; 113 114 /* Check we have enough space in the queue */ 115 free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); 116 117 if (free_slots_cnt < num_of_entries) { 118 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", 119 q->hw_queue_id, num_of_entries); 120 return -EAGAIN; 121 } 122 123 if (reserve_cq_entry) { 124 /* 125 * Check we have enough space in the completion queue 126 * Add -1 to counter (decrement) unless counter was already 0 127 * In that case, CQ is full so we can't submit a new CB because 128 * we won't get ack on its completion 129 * atomic_add_unless will return 0 if counter was already 0 130 */ 131 if (atomic_add_negative(num_of_entries * -1, free_slots)) { 132 dev_dbg(hdev->dev, "No space for %d on CQ %d\n", 133 num_of_entries, q->hw_queue_id); 134 atomic_add(num_of_entries, free_slots); 135 return -EAGAIN; 136 } 137 } 138 139 return 0; 140} 141 142/* 143 * int_queue_sanity_checks - perform some sanity checks on internal queue 144 * 145 * @hdev : pointer to hl_device structure 146 * @q : pointer to hl_hw_queue structure 147 * @num_of_entries : how many entries to check for space 148 * 149 * H/W queues spinlock should be taken before calling this function 150 * 151 * Perform the following: 152 * - Make sure we have enough space in the h/w queue 153 * 154 */ 155static int int_queue_sanity_checks(struct hl_device *hdev, 156 struct hl_hw_queue *q, 157 int num_of_entries) 158{ 159 int free_slots_cnt; 160 161 if (num_of_entries > q->int_queue_len) { 162 dev_err(hdev->dev, 163 "Cannot populate queue %u with %u jobs\n", 164 q->hw_queue_id, num_of_entries); 165 return -ENOMEM; 166 } 167 168 /* Check we have enough space in the queue */ 169 free_slots_cnt = queue_free_slots(q, q->int_queue_len); 170 171 if (free_slots_cnt < num_of_entries) { 172 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", 173 q->hw_queue_id, num_of_entries); 174 return -EAGAIN; 175 } 176 177 return 0; 178} 179 180/* 181 * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue 182 * @hdev: Pointer to hl_device structure. 183 * @q: Pointer to hl_hw_queue structure. 184 * @num_of_entries: How many entries to check for space. 185 * 186 * Notice: We do not reserve queue entries so this function mustn't be called 187 * more than once per CS for the same queue 188 * 189 */ 190static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, 191 int num_of_entries) 192{ 193 int free_slots_cnt; 194 195 /* Check we have enough space in the queue */ 196 free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); 197 198 if (free_slots_cnt < num_of_entries) { 199 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", 200 q->hw_queue_id, num_of_entries); 201 return -EAGAIN; 202 } 203 204 return 0; 205} 206 207/* 208 * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion 209 * 210 * @hdev: pointer to hl_device structure 211 * @hw_queue_id: Queue's type 212 * @cb_size: size of CB 213 * @cb_ptr: pointer to CB location 214 * 215 * This function sends a single CB, that must NOT generate a completion entry 216 * 217 */ 218int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, 219 u32 cb_size, u64 cb_ptr) 220{ 221 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; 222 int rc = 0; 223 224 /* 225 * The CPU queue is a synchronous queue with an effective depth of 226 * a single entry (although it is allocated with room for multiple 227 * entries). Therefore, there is a different lock, called 228 * send_cpu_message_lock, that serializes accesses to the CPU queue. 229 * As a result, we don't need to lock the access to the entire H/W 230 * queues module when submitting a JOB to the CPU queue 231 */ 232 if (q->queue_type != QUEUE_TYPE_CPU) 233 hdev->asic_funcs->hw_queues_lock(hdev); 234 235 if (hdev->disabled) { 236 rc = -EPERM; 237 goto out; 238 } 239 240 /* 241 * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue 242 * type only on init phase, when the queues are empty and being tested, 243 * so there is no need for sanity checks. 244 */ 245 if (q->queue_type != QUEUE_TYPE_HW) { 246 rc = ext_queue_sanity_checks(hdev, q, 1, false); 247 if (rc) 248 goto out; 249 } 250 251 ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); 252 253out: 254 if (q->queue_type != QUEUE_TYPE_CPU) 255 hdev->asic_funcs->hw_queues_unlock(hdev); 256 257 return rc; 258} 259 260/* 261 * ext_queue_schedule_job - submit a JOB to an external queue 262 * 263 * @job: pointer to the job that needs to be submitted to the queue 264 * 265 * This function must be called when the scheduler mutex is taken 266 * 267 */ 268static void ext_queue_schedule_job(struct hl_cs_job *job) 269{ 270 struct hl_device *hdev = job->cs->ctx->hdev; 271 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; 272 struct hl_cq_entry cq_pkt; 273 struct hl_cq *cq; 274 u64 cq_addr; 275 struct hl_cb *cb; 276 u32 ctl; 277 u32 len; 278 u64 ptr; 279 280 /* 281 * Update the JOB ID inside the BD CTL so the device would know what 282 * to write in the completion queue 283 */ 284 ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); 285 286 cb = job->patched_cb; 287 len = job->job_cb_size; 288 ptr = cb->bus_address; 289 290 cq_pkt.data = cpu_to_le32( 291 ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) 292 & CQ_ENTRY_SHADOW_INDEX_MASK) | 293 FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) | 294 FIELD_PREP(CQ_ENTRY_READY_MASK, 1)); 295 296 /* 297 * No need to protect pi_offset because scheduling to the 298 * H/W queues is done under the scheduler mutex 299 * 300 * No need to check if CQ is full because it was already 301 * checked in ext_queue_sanity_checks 302 */ 303 cq = &hdev->completion_queue[q->cq_id]; 304 cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); 305 306 hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, 307 cq_addr, 308 le32_to_cpu(cq_pkt.data), 309 q->msi_vec, 310 job->contains_dma_pkt); 311 312 q->shadow_queue[hl_pi_2_offset(q->pi)] = job; 313 314 cq->pi = hl_cq_inc_ptr(cq->pi); 315 316 ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); 317} 318 319/* 320 * int_queue_schedule_job - submit a JOB to an internal queue 321 * 322 * @job: pointer to the job that needs to be submitted to the queue 323 * 324 * This function must be called when the scheduler mutex is taken 325 * 326 */ 327static void int_queue_schedule_job(struct hl_cs_job *job) 328{ 329 struct hl_device *hdev = job->cs->ctx->hdev; 330 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; 331 struct hl_bd bd; 332 __le64 *pi; 333 334 bd.ctl = 0; 335 bd.len = cpu_to_le32(job->job_cb_size); 336 bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); 337 338 pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); 339 340 q->pi++; 341 q->pi &= ((q->int_queue_len << 1) - 1); 342 343 hdev->asic_funcs->pqe_write(hdev, pi, &bd); 344 345 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); 346} 347 348/* 349 * hw_queue_schedule_job - submit a JOB to a H/W queue 350 * 351 * @job: pointer to the job that needs to be submitted to the queue 352 * 353 * This function must be called when the scheduler mutex is taken 354 * 355 */ 356static void hw_queue_schedule_job(struct hl_cs_job *job) 357{ 358 struct hl_device *hdev = job->cs->ctx->hdev; 359 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; 360 u64 ptr; 361 u32 offset, ctl, len; 362 363 /* 364 * Upon PQE completion, COMP_DATA is used as the write data to the 365 * completion queue (QMAN HBW message), and COMP_OFFSET is used as the 366 * write address offset in the SM block (QMAN LBW message). 367 * The write address offset is calculated as "COMP_OFFSET << 2". 368 */ 369 offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 370 ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | 371 ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); 372 373 len = job->job_cb_size; 374 375 /* 376 * A patched CB is created only if a user CB was allocated by driver and 377 * MMU is disabled. If MMU is enabled, the user CB should be used 378 * instead. If the user CB wasn't allocated by driver, assume that it 379 * holds an address. 380 */ 381 if (job->patched_cb) 382 ptr = job->patched_cb->bus_address; 383 else if (job->is_kernel_allocated_cb) 384 ptr = job->user_cb->bus_address; 385 else 386 ptr = (u64) (uintptr_t) job->user_cb; 387 388 ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); 389} 390 391/* 392 * init_signal_wait_cs - initialize a signal/wait CS 393 * @cs: pointer to the signal/wait CS 394 * 395 * H/W queues spinlock should be taken before calling this function 396 */ 397static void init_signal_wait_cs(struct hl_cs *cs) 398{ 399 struct hl_ctx *ctx = cs->ctx; 400 struct hl_device *hdev = ctx->hdev; 401 struct hl_hw_queue *hw_queue; 402 struct hl_cs_compl *cs_cmpl = 403 container_of(cs->fence, struct hl_cs_compl, base_fence); 404 405 struct hl_hw_sob *hw_sob; 406 struct hl_cs_job *job; 407 u32 q_idx; 408 409 /* There is only one job in a signal/wait CS */ 410 job = list_first_entry(&cs->job_list, struct hl_cs_job, 411 cs_node); 412 q_idx = job->hw_queue_id; 413 hw_queue = &hdev->kernel_queues[q_idx]; 414 415 if (cs->type & CS_TYPE_SIGNAL) { 416 hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset]; 417 418 cs_cmpl->hw_sob = hw_sob; 419 cs_cmpl->sob_val = hw_queue->next_sob_val++; 420 421 dev_dbg(hdev->dev, 422 "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", 423 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); 424 425 hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, 426 cs_cmpl->hw_sob->sob_id); 427 428 kref_get(&hw_sob->kref); 429 430 /* check for wraparound */ 431 if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) { 432 /* 433 * Decrement as we reached the max value. 434 * The release function won't be called here as we've 435 * just incremented the refcount. 436 */ 437 kref_put(&hw_sob->kref, hl_sob_reset_error); 438 hw_queue->next_sob_val = 1; 439 /* only two SOBs are currently in use */ 440 hw_queue->curr_sob_offset = 441 (hw_queue->curr_sob_offset + 1) % 442 HL_RSVD_SOBS_IN_USE; 443 444 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", 445 hw_queue->curr_sob_offset, q_idx); 446 } 447 } else if (cs->type & CS_TYPE_WAIT) { 448 struct hl_cs_compl *signal_cs_cmpl; 449 450 signal_cs_cmpl = container_of(cs->signal_fence, 451 struct hl_cs_compl, 452 base_fence); 453 454 /* copy the the SOB id and value of the signal CS */ 455 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 456 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 457 458 dev_dbg(hdev->dev, 459 "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", 460 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 461 hw_queue->base_mon_id, q_idx); 462 463 hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb, 464 cs_cmpl->hw_sob->sob_id, 465 cs_cmpl->sob_val, 466 hw_queue->base_mon_id, 467 q_idx); 468 469 kref_get(&cs_cmpl->hw_sob->kref); 470 /* 471 * Must put the signal fence after the SOB refcnt increment so 472 * the SOB refcnt won't turn 0 and reset the SOB before the 473 * wait CS was submitted. 474 */ 475 mb(); 476 hl_fence_put(cs->signal_fence); 477 cs->signal_fence = NULL; 478 } 479} 480 481/* 482 * hl_hw_queue_schedule_cs - schedule a command submission 483 * @cs: pointer to the CS 484 */ 485int hl_hw_queue_schedule_cs(struct hl_cs *cs) 486{ 487 struct hl_ctx *ctx = cs->ctx; 488 struct hl_device *hdev = ctx->hdev; 489 struct hl_cs_job *job, *tmp; 490 struct hl_hw_queue *q; 491 u32 max_queues; 492 int rc = 0, i, cq_cnt; 493 494 hdev->asic_funcs->hw_queues_lock(hdev); 495 496 if (hl_device_disabled_or_in_reset(hdev)) { 497 ctx->cs_counters.device_in_reset_drop_cnt++; 498 dev_err(hdev->dev, 499 "device is disabled or in reset, CS rejected!\n"); 500 rc = -EPERM; 501 goto out; 502 } 503 504 max_queues = hdev->asic_prop.max_queues; 505 506 q = &hdev->kernel_queues[0]; 507 for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { 508 if (cs->jobs_in_queue_cnt[i]) { 509 switch (q->queue_type) { 510 case QUEUE_TYPE_EXT: 511 rc = ext_queue_sanity_checks(hdev, q, 512 cs->jobs_in_queue_cnt[i], true); 513 break; 514 case QUEUE_TYPE_INT: 515 rc = int_queue_sanity_checks(hdev, q, 516 cs->jobs_in_queue_cnt[i]); 517 break; 518 case QUEUE_TYPE_HW: 519 rc = hw_queue_sanity_checks(hdev, q, 520 cs->jobs_in_queue_cnt[i]); 521 break; 522 default: 523 dev_err(hdev->dev, "Queue type %d is invalid\n", 524 q->queue_type); 525 rc = -EINVAL; 526 break; 527 } 528 529 if (rc) { 530 ctx->cs_counters.queue_full_drop_cnt++; 531 goto unroll_cq_resv; 532 } 533 534 if (q->queue_type == QUEUE_TYPE_EXT) 535 cq_cnt++; 536 } 537 } 538 539 if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) 540 init_signal_wait_cs(cs); 541 542 spin_lock(&hdev->hw_queues_mirror_lock); 543 list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list); 544 545 /* Queue TDR if the CS is the first entry and if timeout is wanted */ 546 if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && 547 (list_first_entry(&hdev->hw_queues_mirror_list, 548 struct hl_cs, mirror_node) == cs)) { 549 cs->tdr_active = true; 550 schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); 551 spin_unlock(&hdev->hw_queues_mirror_lock); 552 } else { 553 spin_unlock(&hdev->hw_queues_mirror_lock); 554 } 555 556 if (!hdev->cs_active_cnt++) { 557 struct hl_device_idle_busy_ts *ts; 558 559 ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; 560 ts->busy_to_idle_ts = ktime_set(0, 0); 561 ts->idle_to_busy_ts = ktime_get(); 562 } 563 564 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 565 switch (job->queue_type) { 566 case QUEUE_TYPE_EXT: 567 ext_queue_schedule_job(job); 568 break; 569 case QUEUE_TYPE_INT: 570 int_queue_schedule_job(job); 571 break; 572 case QUEUE_TYPE_HW: 573 hw_queue_schedule_job(job); 574 break; 575 default: 576 break; 577 } 578 579 cs->submitted = true; 580 581 goto out; 582 583unroll_cq_resv: 584 q = &hdev->kernel_queues[0]; 585 for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { 586 if ((q->queue_type == QUEUE_TYPE_EXT) && 587 (cs->jobs_in_queue_cnt[i])) { 588 atomic_t *free_slots = 589 &hdev->completion_queue[i].free_slots_cnt; 590 atomic_add(cs->jobs_in_queue_cnt[i], free_slots); 591 cq_cnt--; 592 } 593 } 594 595out: 596 hdev->asic_funcs->hw_queues_unlock(hdev); 597 598 return rc; 599} 600 601/* 602 * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue 603 * 604 * @hdev: pointer to hl_device structure 605 * @hw_queue_id: which queue to increment its ci 606 */ 607void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) 608{ 609 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; 610 611 atomic_inc(&q->ci); 612} 613 614static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, 615 bool is_cpu_queue) 616{ 617 void *p; 618 int rc; 619 620 if (is_cpu_queue) 621 p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, 622 HL_QUEUE_SIZE_IN_BYTES, 623 &q->bus_address); 624 else 625 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, 626 HL_QUEUE_SIZE_IN_BYTES, 627 &q->bus_address, 628 GFP_KERNEL | __GFP_ZERO); 629 if (!p) 630 return -ENOMEM; 631 632 q->kernel_address = p; 633 634 q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, 635 sizeof(*q->shadow_queue), 636 GFP_KERNEL); 637 if (!q->shadow_queue) { 638 dev_err(hdev->dev, 639 "Failed to allocate shadow queue for H/W queue %d\n", 640 q->hw_queue_id); 641 rc = -ENOMEM; 642 goto free_queue; 643 } 644 645 /* Make sure read/write pointers are initialized to start of queue */ 646 atomic_set(&q->ci, 0); 647 q->pi = 0; 648 649 return 0; 650 651free_queue: 652 if (is_cpu_queue) 653 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, 654 HL_QUEUE_SIZE_IN_BYTES, 655 q->kernel_address); 656 else 657 hdev->asic_funcs->asic_dma_free_coherent(hdev, 658 HL_QUEUE_SIZE_IN_BYTES, 659 q->kernel_address, 660 q->bus_address); 661 662 return rc; 663} 664 665static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 666{ 667 void *p; 668 669 p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, 670 &q->bus_address, &q->int_queue_len); 671 if (!p) { 672 dev_err(hdev->dev, 673 "Failed to get base address for internal queue %d\n", 674 q->hw_queue_id); 675 return -EFAULT; 676 } 677 678 q->kernel_address = p; 679 q->pi = 0; 680 atomic_set(&q->ci, 0); 681 682 return 0; 683} 684 685static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 686{ 687 return ext_and_cpu_queue_init(hdev, q, true); 688} 689 690static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 691{ 692 return ext_and_cpu_queue_init(hdev, q, false); 693} 694 695static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 696{ 697 void *p; 698 699 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, 700 HL_QUEUE_SIZE_IN_BYTES, 701 &q->bus_address, 702 GFP_KERNEL | __GFP_ZERO); 703 if (!p) 704 return -ENOMEM; 705 706 q->kernel_address = p; 707 708 /* Make sure read/write pointers are initialized to start of queue */ 709 atomic_set(&q->ci, 0); 710 q->pi = 0; 711 712 return 0; 713} 714 715static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) 716{ 717 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; 718 struct asic_fixed_properties *prop = &hdev->asic_prop; 719 struct hl_hw_sob *hw_sob; 720 int sob, queue_idx = hdev->sync_stream_queue_idx++; 721 722 hw_queue->base_sob_id = 723 prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS; 724 hw_queue->base_mon_id = 725 prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS; 726 hw_queue->next_sob_val = 1; 727 hw_queue->curr_sob_offset = 0; 728 729 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { 730 hw_sob = &hw_queue->hw_sob[sob]; 731 hw_sob->hdev = hdev; 732 hw_sob->sob_id = hw_queue->base_sob_id + sob; 733 hw_sob->q_idx = q_idx; 734 kref_init(&hw_sob->kref); 735 } 736} 737 738static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) 739{ 740 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; 741 742 /* 743 * In case we got here due to a stuck CS, the refcnt might be bigger 744 * than 1 and therefore we reset it. 745 */ 746 kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); 747 hw_queue->curr_sob_offset = 0; 748 hw_queue->next_sob_val = 1; 749} 750 751/* 752 * queue_init - main initialization function for H/W queue object 753 * 754 * @hdev: pointer to hl_device device structure 755 * @q: pointer to hl_hw_queue queue structure 756 * @hw_queue_id: The id of the H/W queue 757 * 758 * Allocate dma-able memory for the queue and initialize fields 759 * Returns 0 on success 760 */ 761static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, 762 u32 hw_queue_id) 763{ 764 int rc; 765 766 q->hw_queue_id = hw_queue_id; 767 768 switch (q->queue_type) { 769 case QUEUE_TYPE_EXT: 770 rc = ext_queue_init(hdev, q); 771 break; 772 case QUEUE_TYPE_INT: 773 rc = int_queue_init(hdev, q); 774 break; 775 case QUEUE_TYPE_CPU: 776 rc = cpu_queue_init(hdev, q); 777 break; 778 case QUEUE_TYPE_HW: 779 rc = hw_queue_init(hdev, q); 780 break; 781 case QUEUE_TYPE_NA: 782 q->valid = 0; 783 return 0; 784 default: 785 dev_crit(hdev->dev, "wrong queue type %d during init\n", 786 q->queue_type); 787 rc = -EINVAL; 788 break; 789 } 790 791 if (q->supports_sync_stream) 792 sync_stream_queue_init(hdev, q->hw_queue_id); 793 794 if (rc) 795 return rc; 796 797 q->valid = 1; 798 799 return 0; 800} 801 802/* 803 * hw_queue_fini - destroy queue 804 * 805 * @hdev: pointer to hl_device device structure 806 * @q: pointer to hl_hw_queue queue structure 807 * 808 * Free the queue memory 809 */ 810static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) 811{ 812 if (!q->valid) 813 return; 814 815 /* 816 * If we arrived here, there are no jobs waiting on this queue 817 * so we can safely remove it. 818 * This is because this function can only called when: 819 * 1. Either a context is deleted, which only can occur if all its 820 * jobs were finished 821 * 2. A context wasn't able to be created due to failure or timeout, 822 * which means there are no jobs on the queue yet 823 * 824 * The only exception are the queues of the kernel context, but 825 * if they are being destroyed, it means that the entire module is 826 * being removed. If the module is removed, it means there is no open 827 * user context. It also means that if a job was submitted by 828 * the kernel driver (e.g. context creation), the job itself was 829 * released by the kernel driver when a timeout occurred on its 830 * Completion. Thus, we don't need to release it again. 831 */ 832 833 if (q->queue_type == QUEUE_TYPE_INT) 834 return; 835 836 kfree(q->shadow_queue); 837 838 if (q->queue_type == QUEUE_TYPE_CPU) 839 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, 840 HL_QUEUE_SIZE_IN_BYTES, 841 q->kernel_address); 842 else 843 hdev->asic_funcs->asic_dma_free_coherent(hdev, 844 HL_QUEUE_SIZE_IN_BYTES, 845 q->kernel_address, 846 q->bus_address); 847} 848 849int hl_hw_queues_create(struct hl_device *hdev) 850{ 851 struct asic_fixed_properties *asic = &hdev->asic_prop; 852 struct hl_hw_queue *q; 853 int i, rc, q_ready_cnt; 854 855 hdev->kernel_queues = kcalloc(asic->max_queues, 856 sizeof(*hdev->kernel_queues), GFP_KERNEL); 857 858 if (!hdev->kernel_queues) { 859 dev_err(hdev->dev, "Not enough memory for H/W queues\n"); 860 return -ENOMEM; 861 } 862 863 /* Initialize the H/W queues */ 864 for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; 865 i < asic->max_queues ; i++, q_ready_cnt++, q++) { 866 867 q->queue_type = asic->hw_queues_props[i].type; 868 q->supports_sync_stream = 869 asic->hw_queues_props[i].supports_sync_stream; 870 rc = queue_init(hdev, q, i); 871 if (rc) { 872 dev_err(hdev->dev, 873 "failed to initialize queue %d\n", i); 874 goto release_queues; 875 } 876 } 877 878 return 0; 879 880release_queues: 881 for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) 882 queue_fini(hdev, q); 883 884 kfree(hdev->kernel_queues); 885 886 return rc; 887} 888 889void hl_hw_queues_destroy(struct hl_device *hdev) 890{ 891 struct hl_hw_queue *q; 892 u32 max_queues = hdev->asic_prop.max_queues; 893 int i; 894 895 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) 896 queue_fini(hdev, q); 897 898 kfree(hdev->kernel_queues); 899} 900 901void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset) 902{ 903 struct hl_hw_queue *q; 904 u32 max_queues = hdev->asic_prop.max_queues; 905 int i; 906 907 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { 908 if ((!q->valid) || 909 ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) 910 continue; 911 q->pi = 0; 912 atomic_set(&q->ci, 0); 913 914 if (q->supports_sync_stream) 915 sync_stream_queue_reset(hdev, q->hw_queue_id); 916 } 917} 918