162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci#ifndef __MIPS_ASM_SYNC_H__ 362306a36Sopenharmony_ci#define __MIPS_ASM_SYNC_H__ 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci/* 662306a36Sopenharmony_ci * sync types are defined by the MIPS64 Instruction Set documentation in Volume 762306a36Sopenharmony_ci * II-A of the MIPS Architecture Reference Manual, which can be found here: 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Two types of barrier are provided: 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * 1) Completion barriers, which ensure that a memory operation has actually 1462306a36Sopenharmony_ci * completed & often involve stalling the CPU pipeline to do so. 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * 2) Ordering barriers, which only ensure that affected memory operations 1762306a36Sopenharmony_ci * won't be reordered in the CPU pipeline in a manner that violates the 1862306a36Sopenharmony_ci * restrictions imposed by the barrier. 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * Ordering barriers can be more efficient than completion barriers, since: 2162306a36Sopenharmony_ci * 2262306a36Sopenharmony_ci * a) Ordering barriers only require memory access instructions which preceed 2362306a36Sopenharmony_ci * them in program order (older instructions) to reach a point in the 2462306a36Sopenharmony_ci * load/store datapath beyond which reordering is not possible before 2562306a36Sopenharmony_ci * allowing memory access instructions which follow them (younger 2662306a36Sopenharmony_ci * instructions) to be performed. That is, older instructions don't 2762306a36Sopenharmony_ci * actually need to complete - they just need to get far enough that all 2862306a36Sopenharmony_ci * other coherent CPUs will observe their completion before they observe 2962306a36Sopenharmony_ci * the effects of younger instructions. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * b) Multiple variants of ordering barrier are provided which allow the 3262306a36Sopenharmony_ci * effects to be restricted to different combinations of older or younger 3362306a36Sopenharmony_ci * loads or stores. By way of example, if we only care that stores older 3462306a36Sopenharmony_ci * than a barrier are observed prior to stores that are younger than a 3562306a36Sopenharmony_ci * barrier & don't care about the ordering of loads then the 'wmb' 3662306a36Sopenharmony_ci * ordering barrier can be used. Limiting the barrier's effects to stores 3762306a36Sopenharmony_ci * allows loads to continue unaffected & potentially allows the CPU to 3862306a36Sopenharmony_ci * make progress faster than if younger loads had to wait for older stores 3962306a36Sopenharmony_ci * to complete. 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci/* 4362306a36Sopenharmony_ci * No sync instruction at all; used to allow code to nullify the effect of the 4462306a36Sopenharmony_ci * __SYNC() macro without needing lots of #ifdefery. 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_ci#define __SYNC_none -1 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci/* 4962306a36Sopenharmony_ci * A full completion barrier; all memory accesses appearing prior to this sync 5062306a36Sopenharmony_ci * instruction in program order must complete before any memory accesses 5162306a36Sopenharmony_ci * appearing after this sync instruction in program order. 5262306a36Sopenharmony_ci */ 5362306a36Sopenharmony_ci#define __SYNC_full 0x00 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci/* 5662306a36Sopenharmony_ci * For now we use a full completion barrier to implement all sync types, until 5762306a36Sopenharmony_ci * we're satisfied that lightweight ordering barriers defined by MIPSr6 are 5862306a36Sopenharmony_ci * sufficient to uphold our desired memory model. 5962306a36Sopenharmony_ci */ 6062306a36Sopenharmony_ci#define __SYNC_aq __SYNC_full 6162306a36Sopenharmony_ci#define __SYNC_rl __SYNC_full 6262306a36Sopenharmony_ci#define __SYNC_mb __SYNC_full 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci/* 6562306a36Sopenharmony_ci * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering 6662306a36Sopenharmony_ci * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform 6762306a36Sopenharmony_ci * speculative reads. 6862306a36Sopenharmony_ci */ 6962306a36Sopenharmony_ci#ifdef CONFIG_CPU_CAVIUM_OCTEON 7062306a36Sopenharmony_ci# define __SYNC_rmb __SYNC_none 7162306a36Sopenharmony_ci# define __SYNC_wmb 0x04 7262306a36Sopenharmony_ci#else 7362306a36Sopenharmony_ci# define __SYNC_rmb __SYNC_full 7462306a36Sopenharmony_ci# define __SYNC_wmb __SYNC_full 7562306a36Sopenharmony_ci#endif 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci/* 7862306a36Sopenharmony_ci * A GINV sync is a little different; it doesn't relate directly to loads or 7962306a36Sopenharmony_ci * stores, but instead causes synchronization of an icache or TLB global 8062306a36Sopenharmony_ci * invalidation operation triggered by the ginvi or ginvt instructions 8162306a36Sopenharmony_ci * respectively. In cases where we need to know that a ginvi or ginvt operation 8262306a36Sopenharmony_ci * has been performed by all coherent CPUs, we must issue a sync instruction of 8362306a36Sopenharmony_ci * this type. Once this instruction graduates all coherent CPUs will have 8462306a36Sopenharmony_ci * observed the invalidation. 8562306a36Sopenharmony_ci */ 8662306a36Sopenharmony_ci#define __SYNC_ginv 0x14 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci/* Trivial; indicate that we always need this sync instruction. */ 8962306a36Sopenharmony_ci#define __SYNC_always (1 << 0) 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci/* 9262306a36Sopenharmony_ci * Indicate that we need this sync instruction only on systems with weakly 9362306a36Sopenharmony_ci * ordered memory access. In general this is most MIPS systems, but there are 9462306a36Sopenharmony_ci * exceptions which provide strongly ordered memory. 9562306a36Sopenharmony_ci */ 9662306a36Sopenharmony_ci#ifdef CONFIG_WEAK_ORDERING 9762306a36Sopenharmony_ci# define __SYNC_weak_ordering (1 << 1) 9862306a36Sopenharmony_ci#else 9962306a36Sopenharmony_ci# define __SYNC_weak_ordering 0 10062306a36Sopenharmony_ci#endif 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci/* 10362306a36Sopenharmony_ci * Indicate that we need this sync instruction only on systems where LL/SC 10462306a36Sopenharmony_ci * don't implicitly provide a memory barrier. In general this is most MIPS 10562306a36Sopenharmony_ci * systems. 10662306a36Sopenharmony_ci */ 10762306a36Sopenharmony_ci#ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC 10862306a36Sopenharmony_ci# define __SYNC_weak_llsc (1 << 2) 10962306a36Sopenharmony_ci#else 11062306a36Sopenharmony_ci# define __SYNC_weak_llsc 0 11162306a36Sopenharmony_ci#endif 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci/* 11462306a36Sopenharmony_ci * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, 11562306a36Sopenharmony_ci * store or prefetch) in between an LL & SC can cause the SC instruction to 11662306a36Sopenharmony_ci * erroneously succeed, breaking atomicity. Whilst it's unusual to write code 11762306a36Sopenharmony_ci * containing such sequences, this bug bites harder than we might otherwise 11862306a36Sopenharmony_ci * expect due to reordering & speculation: 11962306a36Sopenharmony_ci * 12062306a36Sopenharmony_ci * 1) A memory access appearing prior to the LL in program order may actually 12162306a36Sopenharmony_ci * be executed after the LL - this is the reordering case. 12262306a36Sopenharmony_ci * 12362306a36Sopenharmony_ci * In order to avoid this we need to place a memory barrier (ie. a SYNC 12462306a36Sopenharmony_ci * instruction) prior to every LL instruction, in between it and any earlier 12562306a36Sopenharmony_ci * memory access instructions. 12662306a36Sopenharmony_ci * 12762306a36Sopenharmony_ci * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. 12862306a36Sopenharmony_ci * 12962306a36Sopenharmony_ci * 2) If a conditional branch exists between an LL & SC with a target outside 13062306a36Sopenharmony_ci * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg() 13162306a36Sopenharmony_ci * or similar, then misprediction of the branch may allow speculative 13262306a36Sopenharmony_ci * execution of memory accesses from outside of the LL-SC loop. 13362306a36Sopenharmony_ci * 13462306a36Sopenharmony_ci * In order to avoid this we need a memory barrier (ie. a SYNC instruction) 13562306a36Sopenharmony_ci * at each affected branch target. 13662306a36Sopenharmony_ci * 13762306a36Sopenharmony_ci * This case affects all current Loongson 3 CPUs. 13862306a36Sopenharmony_ci * 13962306a36Sopenharmony_ci * The above described cases cause an error in the cache coherence protocol; 14062306a36Sopenharmony_ci * such that the Invalidate of a competing LL-SC goes 'missing' and SC 14162306a36Sopenharmony_ci * erroneously observes its core still has Exclusive state and lets the SC 14262306a36Sopenharmony_ci * proceed. 14362306a36Sopenharmony_ci * 14462306a36Sopenharmony_ci * Therefore the error only occurs on SMP systems. 14562306a36Sopenharmony_ci */ 14662306a36Sopenharmony_ci#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS 14762306a36Sopenharmony_ci# define __SYNC_loongson3_war (1 << 31) 14862306a36Sopenharmony_ci#else 14962306a36Sopenharmony_ci# define __SYNC_loongson3_war 0 15062306a36Sopenharmony_ci#endif 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci/* 15362306a36Sopenharmony_ci * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering 15462306a36Sopenharmony_ci * barrier to be ineffective, requiring the use of 2 in sequence to provide an 15562306a36Sopenharmony_ci * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use 15662306a36Sopenharmony_ci * optimized memory barrier primitives."). Here we specify that the affected 15762306a36Sopenharmony_ci * sync instructions should be emitted twice. 15862306a36Sopenharmony_ci * Note that this expression is evaluated by the assembler (not the compiler), 15962306a36Sopenharmony_ci * and that the assembler evaluates '==' as 0 or -1, not 0 or 1. 16062306a36Sopenharmony_ci */ 16162306a36Sopenharmony_ci#ifdef CONFIG_CPU_CAVIUM_OCTEON 16262306a36Sopenharmony_ci# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb)) 16362306a36Sopenharmony_ci#else 16462306a36Sopenharmony_ci# define __SYNC_rpt(type) 1 16562306a36Sopenharmony_ci#endif 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci/* 16862306a36Sopenharmony_ci * The main event. Here we actually emit a sync instruction of a given type, if 16962306a36Sopenharmony_ci * reason is non-zero. 17062306a36Sopenharmony_ci * 17162306a36Sopenharmony_ci * In future we have the option of emitting entries in a fixups-style table 17262306a36Sopenharmony_ci * here that would allow us to opportunistically remove some sync instructions 17362306a36Sopenharmony_ci * when we detect at runtime that we're running on a CPU that doesn't need 17462306a36Sopenharmony_ci * them. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci#ifdef CONFIG_CPU_HAS_SYNC 17762306a36Sopenharmony_ci# define ____SYNC(_type, _reason, _else) \ 17862306a36Sopenharmony_ci .if (( _type ) != -1) && ( _reason ); \ 17962306a36Sopenharmony_ci .set push; \ 18062306a36Sopenharmony_ci .set MIPS_ISA_LEVEL_RAW; \ 18162306a36Sopenharmony_ci .rept __SYNC_rpt(_type); \ 18262306a36Sopenharmony_ci sync _type; \ 18362306a36Sopenharmony_ci .endr; \ 18462306a36Sopenharmony_ci .set pop; \ 18562306a36Sopenharmony_ci .else; \ 18662306a36Sopenharmony_ci _else; \ 18762306a36Sopenharmony_ci .endif 18862306a36Sopenharmony_ci#else 18962306a36Sopenharmony_ci# define ____SYNC(_type, _reason, _else) 19062306a36Sopenharmony_ci#endif 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci/* 19362306a36Sopenharmony_ci * Preprocessor magic to expand macros used as arguments before we insert them 19462306a36Sopenharmony_ci * into assembly code. 19562306a36Sopenharmony_ci */ 19662306a36Sopenharmony_ci#ifdef __ASSEMBLY__ 19762306a36Sopenharmony_ci# define ___SYNC(type, reason, else) \ 19862306a36Sopenharmony_ci ____SYNC(type, reason, else) 19962306a36Sopenharmony_ci#else 20062306a36Sopenharmony_ci# define ___SYNC(type, reason, else) \ 20162306a36Sopenharmony_ci __stringify(____SYNC(type, reason, else)) 20262306a36Sopenharmony_ci#endif 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci#define __SYNC(type, reason) \ 20562306a36Sopenharmony_ci ___SYNC(__SYNC_##type, __SYNC_##reason, ) 20662306a36Sopenharmony_ci#define __SYNC_ELSE(type, reason, else) \ 20762306a36Sopenharmony_ci ___SYNC(__SYNC_##type, __SYNC_##reason, else) 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci#endif /* __MIPS_ASM_SYNC_H__ */ 210