xref: /kernel/linux/linux-5.10/arch/riscv/lib/delay.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2012 Regents of the University of California
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/delay.h>
78c2ecf20Sopenharmony_ci#include <linux/param.h>
88c2ecf20Sopenharmony_ci#include <linux/timex.h>
98c2ecf20Sopenharmony_ci#include <linux/export.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci/*
128c2ecf20Sopenharmony_ci * This is copies from arch/arm/include/asm/delay.h
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * Loop (or tick) based delay:
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * loops = loops_per_jiffy * jiffies_per_sec * delay_us / us_per_sec
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * where:
198c2ecf20Sopenharmony_ci *
208c2ecf20Sopenharmony_ci * jiffies_per_sec = HZ
218c2ecf20Sopenharmony_ci * us_per_sec = 1000000
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * Therefore the constant part is HZ / 1000000 which is a small
248c2ecf20Sopenharmony_ci * fractional number. To make this usable with integer math, we
258c2ecf20Sopenharmony_ci * scale up this constant by 2^31, perform the actual multiplication,
268c2ecf20Sopenharmony_ci * and scale the result back down by 2^31 with a simple shift:
278c2ecf20Sopenharmony_ci *
288c2ecf20Sopenharmony_ci * loops = (loops_per_jiffy * delay_us * UDELAY_MULT) >> 31
298c2ecf20Sopenharmony_ci *
308c2ecf20Sopenharmony_ci * where:
318c2ecf20Sopenharmony_ci *
328c2ecf20Sopenharmony_ci * UDELAY_MULT = 2^31 * HZ / 1000000
338c2ecf20Sopenharmony_ci *             = (2^31 / 1000000) * HZ
348c2ecf20Sopenharmony_ci *             = 2147.483648 * HZ
358c2ecf20Sopenharmony_ci *             = 2147 * HZ + 483648 * HZ / 1000000
368c2ecf20Sopenharmony_ci *
378c2ecf20Sopenharmony_ci * 31 is the biggest scale shift value that won't overflow 32 bits for
388c2ecf20Sopenharmony_ci * delay_us * UDELAY_MULT assuming HZ <= 1000 and delay_us <= 2000.
398c2ecf20Sopenharmony_ci */
408c2ecf20Sopenharmony_ci#define MAX_UDELAY_US	2000
418c2ecf20Sopenharmony_ci#define MAX_UDELAY_HZ	1000
428c2ecf20Sopenharmony_ci#define UDELAY_MULT	(2147UL * HZ + 483648UL * HZ / 1000000UL)
438c2ecf20Sopenharmony_ci#define UDELAY_SHIFT	31
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci#if HZ > MAX_UDELAY_HZ
468c2ecf20Sopenharmony_ci#error "HZ > MAX_UDELAY_HZ"
478c2ecf20Sopenharmony_ci#endif
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci/*
508c2ecf20Sopenharmony_ci * RISC-V supports both UDELAY and NDELAY.  This is largely the same as above,
518c2ecf20Sopenharmony_ci * but with different constants.  I added 10 bits to the shift to get this, but
528c2ecf20Sopenharmony_ci * the result is that I need a 64-bit multiply, which is slow on 32-bit
538c2ecf20Sopenharmony_ci * platforms.
548c2ecf20Sopenharmony_ci *
558c2ecf20Sopenharmony_ci * NDELAY_MULT = 2^41 * HZ / 1000000000
568c2ecf20Sopenharmony_ci *             = (2^41 / 1000000000) * HZ
578c2ecf20Sopenharmony_ci *             = 2199.02325555 * HZ
588c2ecf20Sopenharmony_ci *             = 2199 * HZ + 23255550 * HZ / 1000000000
598c2ecf20Sopenharmony_ci *
608c2ecf20Sopenharmony_ci * The maximum here is to avoid 64-bit overflow, but it isn't checked as it
618c2ecf20Sopenharmony_ci * won't happen.
628c2ecf20Sopenharmony_ci */
638c2ecf20Sopenharmony_ci#define MAX_NDELAY_NS   (1ULL << 42)
648c2ecf20Sopenharmony_ci#define MAX_NDELAY_HZ	MAX_UDELAY_HZ
658c2ecf20Sopenharmony_ci#define NDELAY_MULT	((unsigned long long)(2199ULL * HZ + 23255550ULL * HZ / 1000000000ULL))
668c2ecf20Sopenharmony_ci#define NDELAY_SHIFT	41
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci#if HZ > MAX_NDELAY_HZ
698c2ecf20Sopenharmony_ci#error "HZ > MAX_NDELAY_HZ"
708c2ecf20Sopenharmony_ci#endif
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_civoid __delay(unsigned long cycles)
738c2ecf20Sopenharmony_ci{
748c2ecf20Sopenharmony_ci	u64 t0 = get_cycles();
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	while ((unsigned long)(get_cycles() - t0) < cycles)
778c2ecf20Sopenharmony_ci		cpu_relax();
788c2ecf20Sopenharmony_ci}
798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__delay);
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_civoid udelay(unsigned long usecs)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT;
848c2ecf20Sopenharmony_ci	u64 n;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	if (unlikely(usecs > MAX_UDELAY_US)) {
878c2ecf20Sopenharmony_ci		n = (u64)usecs * riscv_timebase;
888c2ecf20Sopenharmony_ci		do_div(n, 1000000);
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci		__delay(n);
918c2ecf20Sopenharmony_ci		return;
928c2ecf20Sopenharmony_ci	}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	__delay(ucycles >> UDELAY_SHIFT);
958c2ecf20Sopenharmony_ci}
968c2ecf20Sopenharmony_ciEXPORT_SYMBOL(udelay);
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_civoid ndelay(unsigned long nsecs)
998c2ecf20Sopenharmony_ci{
1008c2ecf20Sopenharmony_ci	/*
1018c2ecf20Sopenharmony_ci	 * This doesn't bother checking for overflow, as it won't happen (it's
1028c2ecf20Sopenharmony_ci	 * an hour) of delay.
1038c2ecf20Sopenharmony_ci	 */
1048c2ecf20Sopenharmony_ci	unsigned long long ncycles = nsecs * lpj_fine * NDELAY_MULT;
1058c2ecf20Sopenharmony_ci	__delay(ncycles >> NDELAY_SHIFT);
1068c2ecf20Sopenharmony_ci}
1078c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ndelay);
108