162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci#ifndef __MIPS_ASM_SYNC_H__
362306a36Sopenharmony_ci#define __MIPS_ASM_SYNC_H__
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci/*
662306a36Sopenharmony_ci * sync types are defined by the MIPS64 Instruction Set documentation in Volume
762306a36Sopenharmony_ci * II-A of the MIPS Architecture Reference Manual, which can be found here:
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci *   https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * Two types of barrier are provided:
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci *   1) Completion barriers, which ensure that a memory operation has actually
1462306a36Sopenharmony_ci *      completed & often involve stalling the CPU pipeline to do so.
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci *   2) Ordering barriers, which only ensure that affected memory operations
1762306a36Sopenharmony_ci *      won't be reordered in the CPU pipeline in a manner that violates the
1862306a36Sopenharmony_ci *      restrictions imposed by the barrier.
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci * Ordering barriers can be more efficient than completion barriers, since:
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci *   a) Ordering barriers only require memory access instructions which preceed
2362306a36Sopenharmony_ci *      them in program order (older instructions) to reach a point in the
2462306a36Sopenharmony_ci *      load/store datapath beyond which reordering is not possible before
2562306a36Sopenharmony_ci *      allowing memory access instructions which follow them (younger
2662306a36Sopenharmony_ci *      instructions) to be performed.  That is, older instructions don't
2762306a36Sopenharmony_ci *      actually need to complete - they just need to get far enough that all
2862306a36Sopenharmony_ci *      other coherent CPUs will observe their completion before they observe
2962306a36Sopenharmony_ci *      the effects of younger instructions.
3062306a36Sopenharmony_ci *
3162306a36Sopenharmony_ci *   b) Multiple variants of ordering barrier are provided which allow the
3262306a36Sopenharmony_ci *      effects to be restricted to different combinations of older or younger
3362306a36Sopenharmony_ci *      loads or stores. By way of example, if we only care that stores older
3462306a36Sopenharmony_ci *      than a barrier are observed prior to stores that are younger than a
3562306a36Sopenharmony_ci *      barrier & don't care about the ordering of loads then the 'wmb'
3662306a36Sopenharmony_ci *      ordering barrier can be used. Limiting the barrier's effects to stores
3762306a36Sopenharmony_ci *      allows loads to continue unaffected & potentially allows the CPU to
3862306a36Sopenharmony_ci *      make progress faster than if younger loads had to wait for older stores
3962306a36Sopenharmony_ci *      to complete.
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci/*
4362306a36Sopenharmony_ci * No sync instruction at all; used to allow code to nullify the effect of the
4462306a36Sopenharmony_ci * __SYNC() macro without needing lots of #ifdefery.
4562306a36Sopenharmony_ci */
4662306a36Sopenharmony_ci#define __SYNC_none	-1
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci/*
4962306a36Sopenharmony_ci * A full completion barrier; all memory accesses appearing prior to this sync
5062306a36Sopenharmony_ci * instruction in program order must complete before any memory accesses
5162306a36Sopenharmony_ci * appearing after this sync instruction in program order.
5262306a36Sopenharmony_ci */
5362306a36Sopenharmony_ci#define __SYNC_full	0x00
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/*
5662306a36Sopenharmony_ci * For now we use a full completion barrier to implement all sync types, until
5762306a36Sopenharmony_ci * we're satisfied that lightweight ordering barriers defined by MIPSr6 are
5862306a36Sopenharmony_ci * sufficient to uphold our desired memory model.
5962306a36Sopenharmony_ci */
6062306a36Sopenharmony_ci#define __SYNC_aq	__SYNC_full
6162306a36Sopenharmony_ci#define __SYNC_rl	__SYNC_full
6262306a36Sopenharmony_ci#define __SYNC_mb	__SYNC_full
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci/*
6562306a36Sopenharmony_ci * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering
6662306a36Sopenharmony_ci * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform
6762306a36Sopenharmony_ci * speculative reads.
6862306a36Sopenharmony_ci */
6962306a36Sopenharmony_ci#ifdef CONFIG_CPU_CAVIUM_OCTEON
7062306a36Sopenharmony_ci# define __SYNC_rmb	__SYNC_none
7162306a36Sopenharmony_ci# define __SYNC_wmb	0x04
7262306a36Sopenharmony_ci#else
7362306a36Sopenharmony_ci# define __SYNC_rmb	__SYNC_full
7462306a36Sopenharmony_ci# define __SYNC_wmb	__SYNC_full
7562306a36Sopenharmony_ci#endif
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci/*
7862306a36Sopenharmony_ci * A GINV sync is a little different; it doesn't relate directly to loads or
7962306a36Sopenharmony_ci * stores, but instead causes synchronization of an icache or TLB global
8062306a36Sopenharmony_ci * invalidation operation triggered by the ginvi or ginvt instructions
8162306a36Sopenharmony_ci * respectively. In cases where we need to know that a ginvi or ginvt operation
8262306a36Sopenharmony_ci * has been performed by all coherent CPUs, we must issue a sync instruction of
8362306a36Sopenharmony_ci * this type. Once this instruction graduates all coherent CPUs will have
8462306a36Sopenharmony_ci * observed the invalidation.
8562306a36Sopenharmony_ci */
8662306a36Sopenharmony_ci#define __SYNC_ginv	0x14
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci/* Trivial; indicate that we always need this sync instruction. */
8962306a36Sopenharmony_ci#define __SYNC_always	(1 << 0)
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci/*
9262306a36Sopenharmony_ci * Indicate that we need this sync instruction only on systems with weakly
9362306a36Sopenharmony_ci * ordered memory access. In general this is most MIPS systems, but there are
9462306a36Sopenharmony_ci * exceptions which provide strongly ordered memory.
9562306a36Sopenharmony_ci */
9662306a36Sopenharmony_ci#ifdef CONFIG_WEAK_ORDERING
9762306a36Sopenharmony_ci# define __SYNC_weak_ordering	(1 << 1)
9862306a36Sopenharmony_ci#else
9962306a36Sopenharmony_ci# define __SYNC_weak_ordering	0
10062306a36Sopenharmony_ci#endif
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci/*
10362306a36Sopenharmony_ci * Indicate that we need this sync instruction only on systems where LL/SC
10462306a36Sopenharmony_ci * don't implicitly provide a memory barrier. In general this is most MIPS
10562306a36Sopenharmony_ci * systems.
10662306a36Sopenharmony_ci */
10762306a36Sopenharmony_ci#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC
10862306a36Sopenharmony_ci# define __SYNC_weak_llsc	(1 << 2)
10962306a36Sopenharmony_ci#else
11062306a36Sopenharmony_ci# define __SYNC_weak_llsc	0
11162306a36Sopenharmony_ci#endif
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/*
11462306a36Sopenharmony_ci * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
11562306a36Sopenharmony_ci * store or prefetch) in between an LL & SC can cause the SC instruction to
11662306a36Sopenharmony_ci * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
11762306a36Sopenharmony_ci * containing such sequences, this bug bites harder than we might otherwise
11862306a36Sopenharmony_ci * expect due to reordering & speculation:
11962306a36Sopenharmony_ci *
12062306a36Sopenharmony_ci * 1) A memory access appearing prior to the LL in program order may actually
12162306a36Sopenharmony_ci *    be executed after the LL - this is the reordering case.
12262306a36Sopenharmony_ci *
12362306a36Sopenharmony_ci *    In order to avoid this we need to place a memory barrier (ie. a SYNC
12462306a36Sopenharmony_ci *    instruction) prior to every LL instruction, in between it and any earlier
12562306a36Sopenharmony_ci *    memory access instructions.
12662306a36Sopenharmony_ci *
12762306a36Sopenharmony_ci *    This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
12862306a36Sopenharmony_ci *
12962306a36Sopenharmony_ci * 2) If a conditional branch exists between an LL & SC with a target outside
13062306a36Sopenharmony_ci *    of the LL-SC loop, for example an exit upon value mismatch in cmpxchg()
13162306a36Sopenharmony_ci *    or similar, then misprediction of the branch may allow speculative
13262306a36Sopenharmony_ci *    execution of memory accesses from outside of the LL-SC loop.
13362306a36Sopenharmony_ci *
13462306a36Sopenharmony_ci *    In order to avoid this we need a memory barrier (ie. a SYNC instruction)
13562306a36Sopenharmony_ci *    at each affected branch target.
13662306a36Sopenharmony_ci *
13762306a36Sopenharmony_ci *    This case affects all current Loongson 3 CPUs.
13862306a36Sopenharmony_ci *
13962306a36Sopenharmony_ci * The above described cases cause an error in the cache coherence protocol;
14062306a36Sopenharmony_ci * such that the Invalidate of a competing LL-SC goes 'missing' and SC
14162306a36Sopenharmony_ci * erroneously observes its core still has Exclusive state and lets the SC
14262306a36Sopenharmony_ci * proceed.
14362306a36Sopenharmony_ci *
14462306a36Sopenharmony_ci * Therefore the error only occurs on SMP systems.
14562306a36Sopenharmony_ci */
14662306a36Sopenharmony_ci#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS
14762306a36Sopenharmony_ci# define __SYNC_loongson3_war	(1 << 31)
14862306a36Sopenharmony_ci#else
14962306a36Sopenharmony_ci# define __SYNC_loongson3_war	0
15062306a36Sopenharmony_ci#endif
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci/*
15362306a36Sopenharmony_ci * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering
15462306a36Sopenharmony_ci * barrier to be ineffective, requiring the use of 2 in sequence to provide an
15562306a36Sopenharmony_ci * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
15662306a36Sopenharmony_ci * optimized memory barrier primitives."). Here we specify that the affected
15762306a36Sopenharmony_ci * sync instructions should be emitted twice.
15862306a36Sopenharmony_ci * Note that this expression is evaluated by the assembler (not the compiler),
15962306a36Sopenharmony_ci * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
16062306a36Sopenharmony_ci */
16162306a36Sopenharmony_ci#ifdef CONFIG_CPU_CAVIUM_OCTEON
16262306a36Sopenharmony_ci# define __SYNC_rpt(type)	(1 - (type == __SYNC_wmb))
16362306a36Sopenharmony_ci#else
16462306a36Sopenharmony_ci# define __SYNC_rpt(type)	1
16562306a36Sopenharmony_ci#endif
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci/*
16862306a36Sopenharmony_ci * The main event. Here we actually emit a sync instruction of a given type, if
16962306a36Sopenharmony_ci * reason is non-zero.
17062306a36Sopenharmony_ci *
17162306a36Sopenharmony_ci * In future we have the option of emitting entries in a fixups-style table
17262306a36Sopenharmony_ci * here that would allow us to opportunistically remove some sync instructions
17362306a36Sopenharmony_ci * when we detect at runtime that we're running on a CPU that doesn't need
17462306a36Sopenharmony_ci * them.
17562306a36Sopenharmony_ci */
17662306a36Sopenharmony_ci#ifdef CONFIG_CPU_HAS_SYNC
17762306a36Sopenharmony_ci# define ____SYNC(_type, _reason, _else)			\
17862306a36Sopenharmony_ci	.if	(( _type ) != -1) && ( _reason );		\
17962306a36Sopenharmony_ci	.set	push;						\
18062306a36Sopenharmony_ci	.set	MIPS_ISA_LEVEL_RAW;				\
18162306a36Sopenharmony_ci	.rept	__SYNC_rpt(_type);				\
18262306a36Sopenharmony_ci	sync	_type;						\
18362306a36Sopenharmony_ci	.endr;							\
18462306a36Sopenharmony_ci	.set	pop;						\
18562306a36Sopenharmony_ci	.else;							\
18662306a36Sopenharmony_ci	_else;							\
18762306a36Sopenharmony_ci	.endif
18862306a36Sopenharmony_ci#else
18962306a36Sopenharmony_ci# define ____SYNC(_type, _reason, _else)
19062306a36Sopenharmony_ci#endif
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci/*
19362306a36Sopenharmony_ci * Preprocessor magic to expand macros used as arguments before we insert them
19462306a36Sopenharmony_ci * into assembly code.
19562306a36Sopenharmony_ci */
19662306a36Sopenharmony_ci#ifdef __ASSEMBLY__
19762306a36Sopenharmony_ci# define ___SYNC(type, reason, else)				\
19862306a36Sopenharmony_ci	____SYNC(type, reason, else)
19962306a36Sopenharmony_ci#else
20062306a36Sopenharmony_ci# define ___SYNC(type, reason, else)				\
20162306a36Sopenharmony_ci	__stringify(____SYNC(type, reason, else))
20262306a36Sopenharmony_ci#endif
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci#define __SYNC(type, reason)					\
20562306a36Sopenharmony_ci	___SYNC(__SYNC_##type, __SYNC_##reason, )
20662306a36Sopenharmony_ci#define __SYNC_ELSE(type, reason, else)				\
20762306a36Sopenharmony_ci	___SYNC(__SYNC_##type, __SYNC_##reason, else)
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci#endif /* __MIPS_ASM_SYNC_H__ */
210