18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#ifndef __PARISC_LDCW_H 38c2ecf20Sopenharmony_ci#define __PARISC_LDCW_H 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, 68c2ecf20Sopenharmony_ci and GCC only guarantees 8-byte alignment for stack locals, we can't 78c2ecf20Sopenharmony_ci be assured of 16-byte alignment for atomic lock data even if we 88c2ecf20Sopenharmony_ci specify "__attribute ((aligned(16)))" in the type declaration. So, 98c2ecf20Sopenharmony_ci we use a struct containing an array of four ints for the atomic lock 108c2ecf20Sopenharmony_ci type and dynamically select the 16-byte aligned int from the array 118c2ecf20Sopenharmony_ci for the semaphore. */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci/* From: "Jim Hull" <jim.hull of hp.com> 148c2ecf20Sopenharmony_ci I've attached a summary of the change, but basically, for PA 2.0, as 158c2ecf20Sopenharmony_ci long as the ",CO" (coherent operation) completer is implemented, then the 168c2ecf20Sopenharmony_ci 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead 178c2ecf20Sopenharmony_ci they only require "natural" alignment (4-byte for ldcw, 8-byte for 188c2ecf20Sopenharmony_ci ldcd). 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci Although the cache control hint is accepted by all PA 2.0 processors, 218c2ecf20Sopenharmony_ci it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still 228c2ecf20Sopenharmony_ci require 16-byte alignment. If the address is unaligned, the operation 238c2ecf20Sopenharmony_ci of the instruction is undefined. The ldcw instruction does not generate 248c2ecf20Sopenharmony_ci unaligned data reference traps so misaligned accesses are not detected. 258c2ecf20Sopenharmony_ci This hid the problem for years. So, restore the 16-byte alignment dropped 268c2ecf20Sopenharmony_ci by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */ 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#define __PA_LDCW_ALIGNMENT 16 298c2ecf20Sopenharmony_ci#define __PA_LDCW_ALIGN_ORDER 4 308c2ecf20Sopenharmony_ci#define __ldcw_align(a) ({ \ 318c2ecf20Sopenharmony_ci unsigned long __ret = (unsigned long) &(a)->lock[0]; \ 328c2ecf20Sopenharmony_ci __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ 338c2ecf20Sopenharmony_ci & ~(__PA_LDCW_ALIGNMENT - 1); \ 348c2ecf20Sopenharmony_ci (volatile unsigned int *) __ret; \ 358c2ecf20Sopenharmony_ci}) 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#ifdef CONFIG_PA20 388c2ecf20Sopenharmony_ci#define __LDCW "ldcw,co" 398c2ecf20Sopenharmony_ci#else 408c2ecf20Sopenharmony_ci#define __LDCW "ldcw" 418c2ecf20Sopenharmony_ci#endif 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. 448c2ecf20Sopenharmony_ci We don't explicitly expose that "*a" may be written as reload 458c2ecf20Sopenharmony_ci fails to find a register in class R1_REGS when "a" needs to be 468c2ecf20Sopenharmony_ci reloaded when generating 64-bit PIC code. Instead, we clobber 478c2ecf20Sopenharmony_ci memory to indicate to the compiler that the assembly code reads 488c2ecf20Sopenharmony_ci or writes to items other than those listed in the input and output 498c2ecf20Sopenharmony_ci operands. This may pessimize the code somewhat but __ldcw is 508c2ecf20Sopenharmony_ci usually used within code blocks surrounded by memory barriers. */ 518c2ecf20Sopenharmony_ci#define __ldcw(a) ({ \ 528c2ecf20Sopenharmony_ci unsigned __ret; \ 538c2ecf20Sopenharmony_ci __asm__ __volatile__(__LDCW " 0(%1),%0" \ 548c2ecf20Sopenharmony_ci : "=r" (__ret) : "r" (a) : "memory"); \ 558c2ecf20Sopenharmony_ci __ret; \ 568c2ecf20Sopenharmony_ci}) 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 598c2ecf20Sopenharmony_ci# define __lock_aligned __section(".data..lock_aligned") 608c2ecf20Sopenharmony_ci#endif 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#endif /* __PARISC_LDCW_H */ 63