162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *	Precise Delay Loops for i386
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *	Copyright (C) 1993 Linus Torvalds
662306a36Sopenharmony_ci *	Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
762306a36Sopenharmony_ci *	Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci *	The __delay function must _NOT_ be inlined as its execution time
1062306a36Sopenharmony_ci *	depends wildly on alignment on many x86 processors. The additional
1162306a36Sopenharmony_ci *	jump magic is needed to get the timing stable on all the CPU's
1262306a36Sopenharmony_ci *	we have to worry about.
1362306a36Sopenharmony_ci */
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#include <linux/export.h>
1662306a36Sopenharmony_ci#include <linux/sched.h>
1762306a36Sopenharmony_ci#include <linux/timex.h>
1862306a36Sopenharmony_ci#include <linux/preempt.h>
1962306a36Sopenharmony_ci#include <linux/delay.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#include <asm/processor.h>
2262306a36Sopenharmony_ci#include <asm/delay.h>
2362306a36Sopenharmony_ci#include <asm/timer.h>
2462306a36Sopenharmony_ci#include <asm/mwait.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#ifdef CONFIG_SMP
2762306a36Sopenharmony_ci# include <asm/smp.h>
2862306a36Sopenharmony_ci#endif
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_cistatic void delay_loop(u64 __loops);
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci/*
3362306a36Sopenharmony_ci * Calibration and selection of the delay mechanism happens only once
3462306a36Sopenharmony_ci * during boot.
3562306a36Sopenharmony_ci */
3662306a36Sopenharmony_cistatic void (*delay_fn)(u64) __ro_after_init = delay_loop;
3762306a36Sopenharmony_cistatic void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci/* simple loop based delay: */
4062306a36Sopenharmony_cistatic void delay_loop(u64 __loops)
4162306a36Sopenharmony_ci{
4262306a36Sopenharmony_ci	unsigned long loops = (unsigned long)__loops;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	asm volatile(
4562306a36Sopenharmony_ci		"	test %0,%0	\n"
4662306a36Sopenharmony_ci		"	jz 3f		\n"
4762306a36Sopenharmony_ci		"	jmp 1f		\n"
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci		".align 16		\n"
5062306a36Sopenharmony_ci		"1:	jmp 2f		\n"
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci		".align 16		\n"
5362306a36Sopenharmony_ci		"2:	dec %0		\n"
5462306a36Sopenharmony_ci		"	jnz 2b		\n"
5562306a36Sopenharmony_ci		"3:	dec %0		\n"
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci		: "+a" (loops)
5862306a36Sopenharmony_ci		:
5962306a36Sopenharmony_ci	);
6062306a36Sopenharmony_ci}
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/* TSC based delay: */
6362306a36Sopenharmony_cistatic void delay_tsc(u64 cycles)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	u64 bclock, now;
6662306a36Sopenharmony_ci	int cpu;
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	preempt_disable();
6962306a36Sopenharmony_ci	cpu = smp_processor_id();
7062306a36Sopenharmony_ci	bclock = rdtsc_ordered();
7162306a36Sopenharmony_ci	for (;;) {
7262306a36Sopenharmony_ci		now = rdtsc_ordered();
7362306a36Sopenharmony_ci		if ((now - bclock) >= cycles)
7462306a36Sopenharmony_ci			break;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci		/* Allow RT tasks to run */
7762306a36Sopenharmony_ci		preempt_enable();
7862306a36Sopenharmony_ci		rep_nop();
7962306a36Sopenharmony_ci		preempt_disable();
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci		/*
8262306a36Sopenharmony_ci		 * It is possible that we moved to another CPU, and
8362306a36Sopenharmony_ci		 * since TSC's are per-cpu we need to calculate
8462306a36Sopenharmony_ci		 * that. The delay must guarantee that we wait "at
8562306a36Sopenharmony_ci		 * least" the amount of time. Being moved to another
8662306a36Sopenharmony_ci		 * CPU could make the wait longer but we just need to
8762306a36Sopenharmony_ci		 * make sure we waited long enough. Rebalance the
8862306a36Sopenharmony_ci		 * counter for this CPU.
8962306a36Sopenharmony_ci		 */
9062306a36Sopenharmony_ci		if (unlikely(cpu != smp_processor_id())) {
9162306a36Sopenharmony_ci			cycles -= (now - bclock);
9262306a36Sopenharmony_ci			cpu = smp_processor_id();
9362306a36Sopenharmony_ci			bclock = rdtsc_ordered();
9462306a36Sopenharmony_ci		}
9562306a36Sopenharmony_ci	}
9662306a36Sopenharmony_ci	preempt_enable();
9762306a36Sopenharmony_ci}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci/*
10062306a36Sopenharmony_ci * On Intel the TPAUSE instruction waits until any of:
10162306a36Sopenharmony_ci * 1) the TSC counter exceeds the value provided in EDX:EAX
10262306a36Sopenharmony_ci * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
10362306a36Sopenharmony_ci * 3) an external interrupt occurs
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_cistatic void delay_halt_tpause(u64 start, u64 cycles)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	u64 until = start + cycles;
10862306a36Sopenharmony_ci	u32 eax, edx;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	eax = lower_32_bits(until);
11162306a36Sopenharmony_ci	edx = upper_32_bits(until);
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	/*
11462306a36Sopenharmony_ci	 * Hard code the deeper (C0.2) sleep state because exit latency is
11562306a36Sopenharmony_ci	 * small compared to the "microseconds" that usleep() will delay.
11662306a36Sopenharmony_ci	 */
11762306a36Sopenharmony_ci	__tpause(TPAUSE_C02_STATE, edx, eax);
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci/*
12162306a36Sopenharmony_ci * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
12262306a36Sopenharmony_ci * counts with TSC frequency. The input value is the number of TSC cycles
12362306a36Sopenharmony_ci * to wait. MWAITX will also exit when the timer expires.
12462306a36Sopenharmony_ci */
12562306a36Sopenharmony_cistatic void delay_halt_mwaitx(u64 unused, u64 cycles)
12662306a36Sopenharmony_ci{
12762306a36Sopenharmony_ci	u64 delay;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
13062306a36Sopenharmony_ci	/*
13162306a36Sopenharmony_ci	 * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
13262306a36Sopenharmony_ci	 * variable as the monitor target.
13362306a36Sopenharmony_ci	 */
13462306a36Sopenharmony_ci	 __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	/*
13762306a36Sopenharmony_ci	 * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
13862306a36Sopenharmony_ci	 * enter any deep C-state and we use it here in delay() to minimize
13962306a36Sopenharmony_ci	 * wakeup latency.
14062306a36Sopenharmony_ci	 */
14162306a36Sopenharmony_ci	__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci/*
14562306a36Sopenharmony_ci * Call a vendor specific function to delay for a given amount of time. Because
14662306a36Sopenharmony_ci * these functions may return earlier than requested, check for actual elapsed
14762306a36Sopenharmony_ci * time and call again until done.
14862306a36Sopenharmony_ci */
14962306a36Sopenharmony_cistatic void delay_halt(u64 __cycles)
15062306a36Sopenharmony_ci{
15162306a36Sopenharmony_ci	u64 start, end, cycles = __cycles;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	/*
15462306a36Sopenharmony_ci	 * Timer value of 0 causes MWAITX to wait indefinitely, unless there
15562306a36Sopenharmony_ci	 * is a store on the memory monitored by MONITORX.
15662306a36Sopenharmony_ci	 */
15762306a36Sopenharmony_ci	if (!cycles)
15862306a36Sopenharmony_ci		return;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	start = rdtsc_ordered();
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	for (;;) {
16362306a36Sopenharmony_ci		delay_halt_fn(start, cycles);
16462306a36Sopenharmony_ci		end = rdtsc_ordered();
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci		if (cycles <= end - start)
16762306a36Sopenharmony_ci			break;
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci		cycles -= end - start;
17062306a36Sopenharmony_ci		start = end;
17162306a36Sopenharmony_ci	}
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_civoid __init use_tsc_delay(void)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	if (delay_fn == delay_loop)
17762306a36Sopenharmony_ci		delay_fn = delay_tsc;
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_civoid __init use_tpause_delay(void)
18162306a36Sopenharmony_ci{
18262306a36Sopenharmony_ci	delay_halt_fn = delay_halt_tpause;
18362306a36Sopenharmony_ci	delay_fn = delay_halt;
18462306a36Sopenharmony_ci}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_civoid use_mwaitx_delay(void)
18762306a36Sopenharmony_ci{
18862306a36Sopenharmony_ci	delay_halt_fn = delay_halt_mwaitx;
18962306a36Sopenharmony_ci	delay_fn = delay_halt;
19062306a36Sopenharmony_ci}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ciint read_current_timer(unsigned long *timer_val)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	if (delay_fn == delay_tsc) {
19562306a36Sopenharmony_ci		*timer_val = rdtsc();
19662306a36Sopenharmony_ci		return 0;
19762306a36Sopenharmony_ci	}
19862306a36Sopenharmony_ci	return -1;
19962306a36Sopenharmony_ci}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_civoid __delay(unsigned long loops)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	delay_fn(loops);
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ciEXPORT_SYMBOL(__delay);
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_cinoinline void __const_udelay(unsigned long xloops)
20862306a36Sopenharmony_ci{
20962306a36Sopenharmony_ci	unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
21062306a36Sopenharmony_ci	int d0;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	xloops *= 4;
21362306a36Sopenharmony_ci	asm("mull %%edx"
21462306a36Sopenharmony_ci		:"=d" (xloops), "=&a" (d0)
21562306a36Sopenharmony_ci		:"1" (xloops), "0" (lpj * (HZ / 4)));
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	__delay(++xloops);
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ciEXPORT_SYMBOL(__const_udelay);
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_civoid __udelay(unsigned long usecs)
22262306a36Sopenharmony_ci{
22362306a36Sopenharmony_ci	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ciEXPORT_SYMBOL(__udelay);
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_civoid __ndelay(unsigned long nsecs)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ciEXPORT_SYMBOL(__ndelay);
232