18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci#ifndef __PARISC_LDCW_H
38c2ecf20Sopenharmony_ci#define __PARISC_LDCW_H
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data,
68c2ecf20Sopenharmony_ci   and GCC only guarantees 8-byte alignment for stack locals, we can't
78c2ecf20Sopenharmony_ci   be assured of 16-byte alignment for atomic lock data even if we
88c2ecf20Sopenharmony_ci   specify "__attribute ((aligned(16)))" in the type declaration.  So,
98c2ecf20Sopenharmony_ci   we use a struct containing an array of four ints for the atomic lock
108c2ecf20Sopenharmony_ci   type and dynamically select the 16-byte aligned int from the array
118c2ecf20Sopenharmony_ci   for the semaphore. */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci/* From: "Jim Hull" <jim.hull of hp.com>
148c2ecf20Sopenharmony_ci   I've attached a summary of the change, but basically, for PA 2.0, as
158c2ecf20Sopenharmony_ci   long as the ",CO" (coherent operation) completer is implemented, then the
168c2ecf20Sopenharmony_ci   16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
178c2ecf20Sopenharmony_ci   they only require "natural" alignment (4-byte for ldcw, 8-byte for
188c2ecf20Sopenharmony_ci   ldcd).
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci   Although the cache control hint is accepted by all PA 2.0 processors,
218c2ecf20Sopenharmony_ci   it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still
228c2ecf20Sopenharmony_ci   require 16-byte alignment. If the address is unaligned, the operation
238c2ecf20Sopenharmony_ci   of the instruction is undefined. The ldcw instruction does not generate
248c2ecf20Sopenharmony_ci   unaligned data reference traps so misaligned accesses are not detected.
258c2ecf20Sopenharmony_ci   This hid the problem for years. So, restore the 16-byte alignment dropped
268c2ecf20Sopenharmony_ci   by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#define __PA_LDCW_ALIGNMENT	16
298c2ecf20Sopenharmony_ci#define __PA_LDCW_ALIGN_ORDER	4
308c2ecf20Sopenharmony_ci#define __ldcw_align(a) ({					\
318c2ecf20Sopenharmony_ci	unsigned long __ret = (unsigned long) &(a)->lock[0];	\
328c2ecf20Sopenharmony_ci	__ret = (__ret + __PA_LDCW_ALIGNMENT - 1)		\
338c2ecf20Sopenharmony_ci		& ~(__PA_LDCW_ALIGNMENT - 1);			\
348c2ecf20Sopenharmony_ci	(volatile unsigned int *) __ret;			\
358c2ecf20Sopenharmony_ci})
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci#ifdef CONFIG_PA20
388c2ecf20Sopenharmony_ci#define __LDCW	"ldcw,co"
398c2ecf20Sopenharmony_ci#else
408c2ecf20Sopenharmony_ci#define __LDCW	"ldcw"
418c2ecf20Sopenharmony_ci#endif
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.
448c2ecf20Sopenharmony_ci   We don't explicitly expose that "*a" may be written as reload
458c2ecf20Sopenharmony_ci   fails to find a register in class R1_REGS when "a" needs to be
468c2ecf20Sopenharmony_ci   reloaded when generating 64-bit PIC code.  Instead, we clobber
478c2ecf20Sopenharmony_ci   memory to indicate to the compiler that the assembly code reads
488c2ecf20Sopenharmony_ci   or writes to items other than those listed in the input and output
498c2ecf20Sopenharmony_ci   operands.  This may pessimize the code somewhat but __ldcw is
508c2ecf20Sopenharmony_ci   usually used within code blocks surrounded by memory barriers.  */
518c2ecf20Sopenharmony_ci#define __ldcw(a) ({						\
528c2ecf20Sopenharmony_ci	unsigned __ret;						\
538c2ecf20Sopenharmony_ci	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
548c2ecf20Sopenharmony_ci		: "=r" (__ret) : "r" (a) : "memory");		\
558c2ecf20Sopenharmony_ci	__ret;							\
568c2ecf20Sopenharmony_ci})
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
598c2ecf20Sopenharmony_ci# define __lock_aligned __section(".data..lock_aligned")
608c2ecf20Sopenharmony_ci#endif
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci#endif /* __PARISC_LDCW_H */
63