162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
462306a36Sopenharmony_ci * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include "rxe.h"
862306a36Sopenharmony_ci
962306a36Sopenharmony_cistatic struct workqueue_struct *rxe_wq;
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ciint rxe_alloc_wq(void)
1262306a36Sopenharmony_ci{
1362306a36Sopenharmony_ci	rxe_wq = alloc_workqueue("rxe_wq", WQ_UNBOUND, WQ_MAX_ACTIVE);
1462306a36Sopenharmony_ci	if (!rxe_wq)
1562306a36Sopenharmony_ci		return -ENOMEM;
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci	return 0;
1862306a36Sopenharmony_ci}
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_civoid rxe_destroy_wq(void)
2162306a36Sopenharmony_ci{
2262306a36Sopenharmony_ci	destroy_workqueue(rxe_wq);
2362306a36Sopenharmony_ci}
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci/* Check if task is idle i.e. not running, not scheduled in
2662306a36Sopenharmony_ci * work queue and not draining. If so move to busy to
2762306a36Sopenharmony_ci * reserve a slot in do_task() by setting to busy and taking
2862306a36Sopenharmony_ci * a qp reference to cover the gap from now until the task finishes.
2962306a36Sopenharmony_ci * state will move out of busy if task returns a non zero value
3062306a36Sopenharmony_ci * in do_task(). If state is already busy it is raised to armed
3162306a36Sopenharmony_ci * to indicate to do_task that additional pass should be made
3262306a36Sopenharmony_ci * over the task.
3362306a36Sopenharmony_ci * Context: caller should hold task->lock.
3462306a36Sopenharmony_ci * Returns: true if state transitioned from idle to busy else false.
3562306a36Sopenharmony_ci */
3662306a36Sopenharmony_cistatic bool __reserve_if_idle(struct rxe_task *task)
3762306a36Sopenharmony_ci{
3862306a36Sopenharmony_ci	WARN_ON(rxe_read(task->qp) <= 0);
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	if (task->state == TASK_STATE_IDLE) {
4162306a36Sopenharmony_ci		rxe_get(task->qp);
4262306a36Sopenharmony_ci		task->state = TASK_STATE_BUSY;
4362306a36Sopenharmony_ci		task->num_sched++;
4462306a36Sopenharmony_ci		return true;
4562306a36Sopenharmony_ci	}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	if (task->state == TASK_STATE_BUSY)
4862306a36Sopenharmony_ci		task->state = TASK_STATE_ARMED;
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	return false;
5162306a36Sopenharmony_ci}
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/* check if task is idle or drained and not currently
5462306a36Sopenharmony_ci * scheduled in the work queue. This routine is
5562306a36Sopenharmony_ci * called by rxe_cleanup_task or rxe_disable_task to
5662306a36Sopenharmony_ci * see if the queue is empty.
5762306a36Sopenharmony_ci * Context: caller should hold task->lock.
5862306a36Sopenharmony_ci * Returns true if done else false.
5962306a36Sopenharmony_ci */
6062306a36Sopenharmony_cistatic bool __is_done(struct rxe_task *task)
6162306a36Sopenharmony_ci{
6262306a36Sopenharmony_ci	if (work_pending(&task->work))
6362306a36Sopenharmony_ci		return false;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	if (task->state == TASK_STATE_IDLE ||
6662306a36Sopenharmony_ci	    task->state == TASK_STATE_DRAINED) {
6762306a36Sopenharmony_ci		return true;
6862306a36Sopenharmony_ci	}
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	return false;
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci/* a locked version of __is_done */
7462306a36Sopenharmony_cistatic bool is_done(struct rxe_task *task)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	unsigned long flags;
7762306a36Sopenharmony_ci	int done;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
8062306a36Sopenharmony_ci	done = __is_done(task);
8162306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	return done;
8462306a36Sopenharmony_ci}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci/* do_task is a wrapper for the three tasks (requester,
8762306a36Sopenharmony_ci * completer, responder) and calls them in a loop until
8862306a36Sopenharmony_ci * they return a non-zero value. It is called either
8962306a36Sopenharmony_ci * directly by rxe_run_task or indirectly if rxe_sched_task
9062306a36Sopenharmony_ci * schedules the task. They must call __reserve_if_idle to
9162306a36Sopenharmony_ci * move the task to busy before calling or scheduling.
9262306a36Sopenharmony_ci * The task can also be moved to drained or invalid
9362306a36Sopenharmony_ci * by calls to rxe_cleanup_task or rxe_disable_task.
9462306a36Sopenharmony_ci * In that case tasks which get here are not executed but
9562306a36Sopenharmony_ci * just flushed. The tasks are designed to look to see if
9662306a36Sopenharmony_ci * there is work to do and then do part of it before returning
9762306a36Sopenharmony_ci * here with a return value of zero until all the work
9862306a36Sopenharmony_ci * has been consumed then it returns a non-zero value.
9962306a36Sopenharmony_ci * The number of times the task can be run is limited by
10062306a36Sopenharmony_ci * max iterations so one task cannot hold the cpu forever.
10162306a36Sopenharmony_ci * If the limit is hit and work remains the task is rescheduled.
10262306a36Sopenharmony_ci */
10362306a36Sopenharmony_cistatic void do_task(struct rxe_task *task)
10462306a36Sopenharmony_ci{
10562306a36Sopenharmony_ci	unsigned int iterations;
10662306a36Sopenharmony_ci	unsigned long flags;
10762306a36Sopenharmony_ci	int resched = 0;
10862306a36Sopenharmony_ci	int cont;
10962306a36Sopenharmony_ci	int ret;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	WARN_ON(rxe_read(task->qp) <= 0);
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
11462306a36Sopenharmony_ci	if (task->state >= TASK_STATE_DRAINED) {
11562306a36Sopenharmony_ci		rxe_put(task->qp);
11662306a36Sopenharmony_ci		task->num_done++;
11762306a36Sopenharmony_ci		spin_unlock_irqrestore(&task->lock, flags);
11862306a36Sopenharmony_ci		return;
11962306a36Sopenharmony_ci	}
12062306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	do {
12362306a36Sopenharmony_ci		iterations = RXE_MAX_ITERATIONS;
12462306a36Sopenharmony_ci		cont = 0;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci		do {
12762306a36Sopenharmony_ci			ret = task->func(task->qp);
12862306a36Sopenharmony_ci		} while (ret == 0 && iterations-- > 0);
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci		spin_lock_irqsave(&task->lock, flags);
13162306a36Sopenharmony_ci		/* we're not done yet but we ran out of iterations.
13262306a36Sopenharmony_ci		 * yield the cpu and reschedule the task
13362306a36Sopenharmony_ci		 */
13462306a36Sopenharmony_ci		if (!ret) {
13562306a36Sopenharmony_ci			task->state = TASK_STATE_IDLE;
13662306a36Sopenharmony_ci			resched = 1;
13762306a36Sopenharmony_ci			goto exit;
13862306a36Sopenharmony_ci		}
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci		switch (task->state) {
14162306a36Sopenharmony_ci		case TASK_STATE_BUSY:
14262306a36Sopenharmony_ci			task->state = TASK_STATE_IDLE;
14362306a36Sopenharmony_ci			break;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		/* someone tried to schedule the task while we
14662306a36Sopenharmony_ci		 * were running, keep going
14762306a36Sopenharmony_ci		 */
14862306a36Sopenharmony_ci		case TASK_STATE_ARMED:
14962306a36Sopenharmony_ci			task->state = TASK_STATE_BUSY;
15062306a36Sopenharmony_ci			cont = 1;
15162306a36Sopenharmony_ci			break;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci		case TASK_STATE_DRAINING:
15462306a36Sopenharmony_ci			task->state = TASK_STATE_DRAINED;
15562306a36Sopenharmony_ci			break;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci		default:
15862306a36Sopenharmony_ci			WARN_ON(1);
15962306a36Sopenharmony_ci			rxe_dbg_qp(task->qp, "unexpected task state = %d",
16062306a36Sopenharmony_ci				   task->state);
16162306a36Sopenharmony_ci			task->state = TASK_STATE_IDLE;
16262306a36Sopenharmony_ci		}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ciexit:
16562306a36Sopenharmony_ci		if (!cont) {
16662306a36Sopenharmony_ci			task->num_done++;
16762306a36Sopenharmony_ci			if (WARN_ON(task->num_done != task->num_sched))
16862306a36Sopenharmony_ci				rxe_dbg_qp(
16962306a36Sopenharmony_ci					task->qp,
17062306a36Sopenharmony_ci					"%ld tasks scheduled, %ld tasks done",
17162306a36Sopenharmony_ci					task->num_sched, task->num_done);
17262306a36Sopenharmony_ci		}
17362306a36Sopenharmony_ci		spin_unlock_irqrestore(&task->lock, flags);
17462306a36Sopenharmony_ci	} while (cont);
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	task->ret = ret;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	if (resched)
17962306a36Sopenharmony_ci		rxe_sched_task(task);
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	rxe_put(task->qp);
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci/* wrapper around do_task to fix argument for work queue */
18562306a36Sopenharmony_cistatic void do_work(struct work_struct *work)
18662306a36Sopenharmony_ci{
18762306a36Sopenharmony_ci	do_task(container_of(work, struct rxe_task, work));
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ciint rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
19162306a36Sopenharmony_ci		  int (*func)(struct rxe_qp *))
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	WARN_ON(rxe_read(qp) <= 0);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	task->qp = qp;
19662306a36Sopenharmony_ci	task->func = func;
19762306a36Sopenharmony_ci	task->state = TASK_STATE_IDLE;
19862306a36Sopenharmony_ci	spin_lock_init(&task->lock);
19962306a36Sopenharmony_ci	INIT_WORK(&task->work, do_work);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	return 0;
20262306a36Sopenharmony_ci}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci/* rxe_cleanup_task is only called from rxe_do_qp_cleanup in
20562306a36Sopenharmony_ci * process context. The qp is already completed with no
20662306a36Sopenharmony_ci * remaining references. Once the queue is drained the
20762306a36Sopenharmony_ci * task is moved to invalid and returns. The qp cleanup
20862306a36Sopenharmony_ci * code then calls the task functions directly without
20962306a36Sopenharmony_ci * using the task struct to drain any late arriving packets
21062306a36Sopenharmony_ci * or work requests.
21162306a36Sopenharmony_ci */
21262306a36Sopenharmony_civoid rxe_cleanup_task(struct rxe_task *task)
21362306a36Sopenharmony_ci{
21462306a36Sopenharmony_ci	unsigned long flags;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
21762306a36Sopenharmony_ci	if (!__is_done(task) && task->state < TASK_STATE_DRAINED) {
21862306a36Sopenharmony_ci		task->state = TASK_STATE_DRAINING;
21962306a36Sopenharmony_ci	} else {
22062306a36Sopenharmony_ci		task->state = TASK_STATE_INVALID;
22162306a36Sopenharmony_ci		spin_unlock_irqrestore(&task->lock, flags);
22262306a36Sopenharmony_ci		return;
22362306a36Sopenharmony_ci	}
22462306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	/* now the task cannot be scheduled or run just wait
22762306a36Sopenharmony_ci	 * for the previously scheduled tasks to finish.
22862306a36Sopenharmony_ci	 */
22962306a36Sopenharmony_ci	while (!is_done(task))
23062306a36Sopenharmony_ci		cond_resched();
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
23362306a36Sopenharmony_ci	task->state = TASK_STATE_INVALID;
23462306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
23562306a36Sopenharmony_ci}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci/* run the task inline if it is currently idle
23862306a36Sopenharmony_ci * cannot call do_task holding the lock
23962306a36Sopenharmony_ci */
24062306a36Sopenharmony_civoid rxe_run_task(struct rxe_task *task)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	unsigned long flags;
24362306a36Sopenharmony_ci	bool run;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	WARN_ON(rxe_read(task->qp) <= 0);
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
24862306a36Sopenharmony_ci	run = __reserve_if_idle(task);
24962306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	if (run)
25262306a36Sopenharmony_ci		do_task(task);
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci/* schedule the task to run later as a work queue entry.
25662306a36Sopenharmony_ci * the queue_work call can be called holding
25762306a36Sopenharmony_ci * the lock.
25862306a36Sopenharmony_ci */
25962306a36Sopenharmony_civoid rxe_sched_task(struct rxe_task *task)
26062306a36Sopenharmony_ci{
26162306a36Sopenharmony_ci	unsigned long flags;
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	WARN_ON(rxe_read(task->qp) <= 0);
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
26662306a36Sopenharmony_ci	if (__reserve_if_idle(task))
26762306a36Sopenharmony_ci		queue_work(rxe_wq, &task->work);
26862306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci/* rxe_disable/enable_task are only called from
27262306a36Sopenharmony_ci * rxe_modify_qp in process context. Task is moved
27362306a36Sopenharmony_ci * to the drained state by do_task.
27462306a36Sopenharmony_ci */
27562306a36Sopenharmony_civoid rxe_disable_task(struct rxe_task *task)
27662306a36Sopenharmony_ci{
27762306a36Sopenharmony_ci	unsigned long flags;
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	WARN_ON(rxe_read(task->qp) <= 0);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
28262306a36Sopenharmony_ci	if (!__is_done(task) && task->state < TASK_STATE_DRAINED) {
28362306a36Sopenharmony_ci		task->state = TASK_STATE_DRAINING;
28462306a36Sopenharmony_ci	} else {
28562306a36Sopenharmony_ci		task->state = TASK_STATE_DRAINED;
28662306a36Sopenharmony_ci		spin_unlock_irqrestore(&task->lock, flags);
28762306a36Sopenharmony_ci		return;
28862306a36Sopenharmony_ci	}
28962306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	while (!is_done(task))
29262306a36Sopenharmony_ci		cond_resched();
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
29562306a36Sopenharmony_ci	task->state = TASK_STATE_DRAINED;
29662306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
29762306a36Sopenharmony_ci}
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_civoid rxe_enable_task(struct rxe_task *task)
30062306a36Sopenharmony_ci{
30162306a36Sopenharmony_ci	unsigned long flags;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	WARN_ON(rxe_read(task->qp) <= 0);
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	spin_lock_irqsave(&task->lock, flags);
30662306a36Sopenharmony_ci	if (task->state == TASK_STATE_INVALID) {
30762306a36Sopenharmony_ci		spin_unlock_irqrestore(&task->lock, flags);
30862306a36Sopenharmony_ci		return;
30962306a36Sopenharmony_ci	}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	task->state = TASK_STATE_IDLE;
31262306a36Sopenharmony_ci	spin_unlock_irqrestore(&task->lock, flags);
31362306a36Sopenharmony_ci}
314