162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 462306a36Sopenharmony_ci * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include "rxe.h" 862306a36Sopenharmony_ci 962306a36Sopenharmony_cistatic struct workqueue_struct *rxe_wq; 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ciint rxe_alloc_wq(void) 1262306a36Sopenharmony_ci{ 1362306a36Sopenharmony_ci rxe_wq = alloc_workqueue("rxe_wq", WQ_UNBOUND, WQ_MAX_ACTIVE); 1462306a36Sopenharmony_ci if (!rxe_wq) 1562306a36Sopenharmony_ci return -ENOMEM; 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci return 0; 1862306a36Sopenharmony_ci} 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_civoid rxe_destroy_wq(void) 2162306a36Sopenharmony_ci{ 2262306a36Sopenharmony_ci destroy_workqueue(rxe_wq); 2362306a36Sopenharmony_ci} 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* Check if task is idle i.e. not running, not scheduled in 2662306a36Sopenharmony_ci * work queue and not draining. If so move to busy to 2762306a36Sopenharmony_ci * reserve a slot in do_task() by setting to busy and taking 2862306a36Sopenharmony_ci * a qp reference to cover the gap from now until the task finishes. 2962306a36Sopenharmony_ci * state will move out of busy if task returns a non zero value 3062306a36Sopenharmony_ci * in do_task(). If state is already busy it is raised to armed 3162306a36Sopenharmony_ci * to indicate to do_task that additional pass should be made 3262306a36Sopenharmony_ci * over the task. 3362306a36Sopenharmony_ci * Context: caller should hold task->lock. 3462306a36Sopenharmony_ci * Returns: true if state transitioned from idle to busy else false. 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_cistatic bool __reserve_if_idle(struct rxe_task *task) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci WARN_ON(rxe_read(task->qp) <= 0); 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci if (task->state == TASK_STATE_IDLE) { 4162306a36Sopenharmony_ci rxe_get(task->qp); 4262306a36Sopenharmony_ci task->state = TASK_STATE_BUSY; 4362306a36Sopenharmony_ci task->num_sched++; 4462306a36Sopenharmony_ci return true; 4562306a36Sopenharmony_ci } 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci if (task->state == TASK_STATE_BUSY) 4862306a36Sopenharmony_ci task->state = TASK_STATE_ARMED; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci return false; 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* check if task is idle or drained and not currently 5462306a36Sopenharmony_ci * scheduled in the work queue. This routine is 5562306a36Sopenharmony_ci * called by rxe_cleanup_task or rxe_disable_task to 5662306a36Sopenharmony_ci * see if the queue is empty. 5762306a36Sopenharmony_ci * Context: caller should hold task->lock. 5862306a36Sopenharmony_ci * Returns true if done else false. 5962306a36Sopenharmony_ci */ 6062306a36Sopenharmony_cistatic bool __is_done(struct rxe_task *task) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci if (work_pending(&task->work)) 6362306a36Sopenharmony_ci return false; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci if (task->state == TASK_STATE_IDLE || 6662306a36Sopenharmony_ci task->state == TASK_STATE_DRAINED) { 6762306a36Sopenharmony_ci return true; 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci return false; 7162306a36Sopenharmony_ci} 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci/* a locked version of __is_done */ 7462306a36Sopenharmony_cistatic bool is_done(struct rxe_task *task) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci unsigned long flags; 7762306a36Sopenharmony_ci int done; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 8062306a36Sopenharmony_ci done = __is_done(task); 8162306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci return done; 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci/* do_task is a wrapper for the three tasks (requester, 8762306a36Sopenharmony_ci * completer, responder) and calls them in a loop until 8862306a36Sopenharmony_ci * they return a non-zero value. It is called either 8962306a36Sopenharmony_ci * directly by rxe_run_task or indirectly if rxe_sched_task 9062306a36Sopenharmony_ci * schedules the task. They must call __reserve_if_idle to 9162306a36Sopenharmony_ci * move the task to busy before calling or scheduling. 9262306a36Sopenharmony_ci * The task can also be moved to drained or invalid 9362306a36Sopenharmony_ci * by calls to rxe_cleanup_task or rxe_disable_task. 9462306a36Sopenharmony_ci * In that case tasks which get here are not executed but 9562306a36Sopenharmony_ci * just flushed. The tasks are designed to look to see if 9662306a36Sopenharmony_ci * there is work to do and then do part of it before returning 9762306a36Sopenharmony_ci * here with a return value of zero until all the work 9862306a36Sopenharmony_ci * has been consumed then it returns a non-zero value. 9962306a36Sopenharmony_ci * The number of times the task can be run is limited by 10062306a36Sopenharmony_ci * max iterations so one task cannot hold the cpu forever. 10162306a36Sopenharmony_ci * If the limit is hit and work remains the task is rescheduled. 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_cistatic void do_task(struct rxe_task *task) 10462306a36Sopenharmony_ci{ 10562306a36Sopenharmony_ci unsigned int iterations; 10662306a36Sopenharmony_ci unsigned long flags; 10762306a36Sopenharmony_ci int resched = 0; 10862306a36Sopenharmony_ci int cont; 10962306a36Sopenharmony_ci int ret; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci WARN_ON(rxe_read(task->qp) <= 0); 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 11462306a36Sopenharmony_ci if (task->state >= TASK_STATE_DRAINED) { 11562306a36Sopenharmony_ci rxe_put(task->qp); 11662306a36Sopenharmony_ci task->num_done++; 11762306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 11862306a36Sopenharmony_ci return; 11962306a36Sopenharmony_ci } 12062306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci do { 12362306a36Sopenharmony_ci iterations = RXE_MAX_ITERATIONS; 12462306a36Sopenharmony_ci cont = 0; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci do { 12762306a36Sopenharmony_ci ret = task->func(task->qp); 12862306a36Sopenharmony_ci } while (ret == 0 && iterations-- > 0); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 13162306a36Sopenharmony_ci /* we're not done yet but we ran out of iterations. 13262306a36Sopenharmony_ci * yield the cpu and reschedule the task 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_ci if (!ret) { 13562306a36Sopenharmony_ci task->state = TASK_STATE_IDLE; 13662306a36Sopenharmony_ci resched = 1; 13762306a36Sopenharmony_ci goto exit; 13862306a36Sopenharmony_ci } 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci switch (task->state) { 14162306a36Sopenharmony_ci case TASK_STATE_BUSY: 14262306a36Sopenharmony_ci task->state = TASK_STATE_IDLE; 14362306a36Sopenharmony_ci break; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci /* someone tried to schedule the task while we 14662306a36Sopenharmony_ci * were running, keep going 14762306a36Sopenharmony_ci */ 14862306a36Sopenharmony_ci case TASK_STATE_ARMED: 14962306a36Sopenharmony_ci task->state = TASK_STATE_BUSY; 15062306a36Sopenharmony_ci cont = 1; 15162306a36Sopenharmony_ci break; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci case TASK_STATE_DRAINING: 15462306a36Sopenharmony_ci task->state = TASK_STATE_DRAINED; 15562306a36Sopenharmony_ci break; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci default: 15862306a36Sopenharmony_ci WARN_ON(1); 15962306a36Sopenharmony_ci rxe_dbg_qp(task->qp, "unexpected task state = %d", 16062306a36Sopenharmony_ci task->state); 16162306a36Sopenharmony_ci task->state = TASK_STATE_IDLE; 16262306a36Sopenharmony_ci } 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ciexit: 16562306a36Sopenharmony_ci if (!cont) { 16662306a36Sopenharmony_ci task->num_done++; 16762306a36Sopenharmony_ci if (WARN_ON(task->num_done != task->num_sched)) 16862306a36Sopenharmony_ci rxe_dbg_qp( 16962306a36Sopenharmony_ci task->qp, 17062306a36Sopenharmony_ci "%ld tasks scheduled, %ld tasks done", 17162306a36Sopenharmony_ci task->num_sched, task->num_done); 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 17462306a36Sopenharmony_ci } while (cont); 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci task->ret = ret; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci if (resched) 17962306a36Sopenharmony_ci rxe_sched_task(task); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci rxe_put(task->qp); 18262306a36Sopenharmony_ci} 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci/* wrapper around do_task to fix argument for work queue */ 18562306a36Sopenharmony_cistatic void do_work(struct work_struct *work) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci do_task(container_of(work, struct rxe_task, work)); 18862306a36Sopenharmony_ci} 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ciint rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, 19162306a36Sopenharmony_ci int (*func)(struct rxe_qp *)) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci WARN_ON(rxe_read(qp) <= 0); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci task->qp = qp; 19662306a36Sopenharmony_ci task->func = func; 19762306a36Sopenharmony_ci task->state = TASK_STATE_IDLE; 19862306a36Sopenharmony_ci spin_lock_init(&task->lock); 19962306a36Sopenharmony_ci INIT_WORK(&task->work, do_work); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci return 0; 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci/* rxe_cleanup_task is only called from rxe_do_qp_cleanup in 20562306a36Sopenharmony_ci * process context. The qp is already completed with no 20662306a36Sopenharmony_ci * remaining references. Once the queue is drained the 20762306a36Sopenharmony_ci * task is moved to invalid and returns. The qp cleanup 20862306a36Sopenharmony_ci * code then calls the task functions directly without 20962306a36Sopenharmony_ci * using the task struct to drain any late arriving packets 21062306a36Sopenharmony_ci * or work requests. 21162306a36Sopenharmony_ci */ 21262306a36Sopenharmony_civoid rxe_cleanup_task(struct rxe_task *task) 21362306a36Sopenharmony_ci{ 21462306a36Sopenharmony_ci unsigned long flags; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 21762306a36Sopenharmony_ci if (!__is_done(task) && task->state < TASK_STATE_DRAINED) { 21862306a36Sopenharmony_ci task->state = TASK_STATE_DRAINING; 21962306a36Sopenharmony_ci } else { 22062306a36Sopenharmony_ci task->state = TASK_STATE_INVALID; 22162306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 22262306a36Sopenharmony_ci return; 22362306a36Sopenharmony_ci } 22462306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci /* now the task cannot be scheduled or run just wait 22762306a36Sopenharmony_ci * for the previously scheduled tasks to finish. 22862306a36Sopenharmony_ci */ 22962306a36Sopenharmony_ci while (!is_done(task)) 23062306a36Sopenharmony_ci cond_resched(); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 23362306a36Sopenharmony_ci task->state = TASK_STATE_INVALID; 23462306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 23562306a36Sopenharmony_ci} 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci/* run the task inline if it is currently idle 23862306a36Sopenharmony_ci * cannot call do_task holding the lock 23962306a36Sopenharmony_ci */ 24062306a36Sopenharmony_civoid rxe_run_task(struct rxe_task *task) 24162306a36Sopenharmony_ci{ 24262306a36Sopenharmony_ci unsigned long flags; 24362306a36Sopenharmony_ci bool run; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci WARN_ON(rxe_read(task->qp) <= 0); 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 24862306a36Sopenharmony_ci run = __reserve_if_idle(task); 24962306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci if (run) 25262306a36Sopenharmony_ci do_task(task); 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci/* schedule the task to run later as a work queue entry. 25662306a36Sopenharmony_ci * the queue_work call can be called holding 25762306a36Sopenharmony_ci * the lock. 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_civoid rxe_sched_task(struct rxe_task *task) 26062306a36Sopenharmony_ci{ 26162306a36Sopenharmony_ci unsigned long flags; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci WARN_ON(rxe_read(task->qp) <= 0); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 26662306a36Sopenharmony_ci if (__reserve_if_idle(task)) 26762306a36Sopenharmony_ci queue_work(rxe_wq, &task->work); 26862306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci/* rxe_disable/enable_task are only called from 27262306a36Sopenharmony_ci * rxe_modify_qp in process context. Task is moved 27362306a36Sopenharmony_ci * to the drained state by do_task. 27462306a36Sopenharmony_ci */ 27562306a36Sopenharmony_civoid rxe_disable_task(struct rxe_task *task) 27662306a36Sopenharmony_ci{ 27762306a36Sopenharmony_ci unsigned long flags; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci WARN_ON(rxe_read(task->qp) <= 0); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 28262306a36Sopenharmony_ci if (!__is_done(task) && task->state < TASK_STATE_DRAINED) { 28362306a36Sopenharmony_ci task->state = TASK_STATE_DRAINING; 28462306a36Sopenharmony_ci } else { 28562306a36Sopenharmony_ci task->state = TASK_STATE_DRAINED; 28662306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 28762306a36Sopenharmony_ci return; 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci while (!is_done(task)) 29262306a36Sopenharmony_ci cond_resched(); 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 29562306a36Sopenharmony_ci task->state = TASK_STATE_DRAINED; 29662306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 29762306a36Sopenharmony_ci} 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_civoid rxe_enable_task(struct rxe_task *task) 30062306a36Sopenharmony_ci{ 30162306a36Sopenharmony_ci unsigned long flags; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci WARN_ON(rxe_read(task->qp) <= 0); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci spin_lock_irqsave(&task->lock, flags); 30662306a36Sopenharmony_ci if (task->state == TASK_STATE_INVALID) { 30762306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 30862306a36Sopenharmony_ci return; 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci task->state = TASK_STATE_IDLE; 31262306a36Sopenharmony_ci spin_unlock_irqrestore(&task->lock, flags); 31362306a36Sopenharmony_ci} 314