162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2012 Regents of the University of California
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/delay.h>
762306a36Sopenharmony_ci#include <linux/math.h>
862306a36Sopenharmony_ci#include <linux/param.h>
962306a36Sopenharmony_ci#include <linux/timex.h>
1062306a36Sopenharmony_ci#include <linux/types.h>
1162306a36Sopenharmony_ci#include <linux/export.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <asm/processor.h>
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci/*
1662306a36Sopenharmony_ci * This is copies from arch/arm/include/asm/delay.h
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * Loop (or tick) based delay:
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci * loops = loops_per_jiffy * jiffies_per_sec * delay_us / us_per_sec
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * where:
2362306a36Sopenharmony_ci *
2462306a36Sopenharmony_ci * jiffies_per_sec = HZ
2562306a36Sopenharmony_ci * us_per_sec = 1000000
2662306a36Sopenharmony_ci *
2762306a36Sopenharmony_ci * Therefore the constant part is HZ / 1000000 which is a small
2862306a36Sopenharmony_ci * fractional number. To make this usable with integer math, we
2962306a36Sopenharmony_ci * scale up this constant by 2^31, perform the actual multiplication,
3062306a36Sopenharmony_ci * and scale the result back down by 2^31 with a simple shift:
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci * loops = (loops_per_jiffy * delay_us * UDELAY_MULT) >> 31
3362306a36Sopenharmony_ci *
3462306a36Sopenharmony_ci * where:
3562306a36Sopenharmony_ci *
3662306a36Sopenharmony_ci * UDELAY_MULT = 2^31 * HZ / 1000000
3762306a36Sopenharmony_ci *             = (2^31 / 1000000) * HZ
3862306a36Sopenharmony_ci *             = 2147.483648 * HZ
3962306a36Sopenharmony_ci *             = 2147 * HZ + 483648 * HZ / 1000000
4062306a36Sopenharmony_ci *
4162306a36Sopenharmony_ci * 31 is the biggest scale shift value that won't overflow 32 bits for
4262306a36Sopenharmony_ci * delay_us * UDELAY_MULT assuming HZ <= 1000 and delay_us <= 2000.
4362306a36Sopenharmony_ci */
4462306a36Sopenharmony_ci#define MAX_UDELAY_US	2000
4562306a36Sopenharmony_ci#define MAX_UDELAY_HZ	1000
4662306a36Sopenharmony_ci#define UDELAY_MULT	(2147UL * HZ + 483648UL * HZ / 1000000UL)
4762306a36Sopenharmony_ci#define UDELAY_SHIFT	31
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#if HZ > MAX_UDELAY_HZ
5062306a36Sopenharmony_ci#error "HZ > MAX_UDELAY_HZ"
5162306a36Sopenharmony_ci#endif
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * RISC-V supports both UDELAY and NDELAY.  This is largely the same as above,
5562306a36Sopenharmony_ci * but with different constants.  I added 10 bits to the shift to get this, but
5662306a36Sopenharmony_ci * the result is that I need a 64-bit multiply, which is slow on 32-bit
5762306a36Sopenharmony_ci * platforms.
5862306a36Sopenharmony_ci *
5962306a36Sopenharmony_ci * NDELAY_MULT = 2^41 * HZ / 1000000000
6062306a36Sopenharmony_ci *             = (2^41 / 1000000000) * HZ
6162306a36Sopenharmony_ci *             = 2199.02325555 * HZ
6262306a36Sopenharmony_ci *             = 2199 * HZ + 23255550 * HZ / 1000000000
6362306a36Sopenharmony_ci *
6462306a36Sopenharmony_ci * The maximum here is to avoid 64-bit overflow, but it isn't checked as it
6562306a36Sopenharmony_ci * won't happen.
6662306a36Sopenharmony_ci */
6762306a36Sopenharmony_ci#define MAX_NDELAY_NS   (1ULL << 42)
6862306a36Sopenharmony_ci#define MAX_NDELAY_HZ	MAX_UDELAY_HZ
6962306a36Sopenharmony_ci#define NDELAY_MULT	((unsigned long long)(2199ULL * HZ + 23255550ULL * HZ / 1000000000ULL))
7062306a36Sopenharmony_ci#define NDELAY_SHIFT	41
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci#if HZ > MAX_NDELAY_HZ
7362306a36Sopenharmony_ci#error "HZ > MAX_NDELAY_HZ"
7462306a36Sopenharmony_ci#endif
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_civoid __delay(unsigned long cycles)
7762306a36Sopenharmony_ci{
7862306a36Sopenharmony_ci	u64 t0 = get_cycles();
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	while ((unsigned long)(get_cycles() - t0) < cycles)
8162306a36Sopenharmony_ci		cpu_relax();
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ciEXPORT_SYMBOL(__delay);
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_civoid udelay(unsigned long usecs)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT;
8862306a36Sopenharmony_ci	u64 n;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	if (unlikely(usecs > MAX_UDELAY_US)) {
9162306a36Sopenharmony_ci		n = (u64)usecs * riscv_timebase;
9262306a36Sopenharmony_ci		do_div(n, 1000000);
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci		__delay(n);
9562306a36Sopenharmony_ci		return;
9662306a36Sopenharmony_ci	}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	__delay(ucycles >> UDELAY_SHIFT);
9962306a36Sopenharmony_ci}
10062306a36Sopenharmony_ciEXPORT_SYMBOL(udelay);
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_civoid ndelay(unsigned long nsecs)
10362306a36Sopenharmony_ci{
10462306a36Sopenharmony_ci	/*
10562306a36Sopenharmony_ci	 * This doesn't bother checking for overflow, as it won't happen (it's
10662306a36Sopenharmony_ci	 * an hour) of delay.
10762306a36Sopenharmony_ci	 */
10862306a36Sopenharmony_ci	unsigned long long ncycles = nsecs * lpj_fine * NDELAY_MULT;
10962306a36Sopenharmony_ci	__delay(ncycles >> NDELAY_SHIFT);
11062306a36Sopenharmony_ci}
11162306a36Sopenharmony_ciEXPORT_SYMBOL(ndelay);
112