18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#ifndef __ASM_SH_UNALIGNED_SH4A_H 38c2ecf20Sopenharmony_ci#define __ASM_SH_UNALIGNED_SH4A_H 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci/* 68c2ecf20Sopenharmony_ci * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only. 78c2ecf20Sopenharmony_ci * Support for 64-bit accesses are done through shifting and masking 88c2ecf20Sopenharmony_ci * relative to the endianness. Unaligned stores are not supported by the 98c2ecf20Sopenharmony_ci * instruction encoding, so these continue to use the packed 108c2ecf20Sopenharmony_ci * struct. 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * The same note as with the movli.l/movco.l pair applies here, as long 138c2ecf20Sopenharmony_ci * as the load is guaranteed to be inlined, nothing else will hook in to 148c2ecf20Sopenharmony_ci * r0 and we get the return value for free. 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * NOTE: Due to the fact we require r0 encoding, care should be taken to 178c2ecf20Sopenharmony_ci * avoid mixing these heavily with other r0 consumers, such as the atomic 188c2ecf20Sopenharmony_ci * ops. Failure to adhere to this can result in the compiler running out 198c2ecf20Sopenharmony_ci * of spill registers and blowing up when building at low optimization 208c2ecf20Sopenharmony_ci * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777. 218c2ecf20Sopenharmony_ci */ 228c2ecf20Sopenharmony_ci#include <linux/unaligned/packed_struct.h> 238c2ecf20Sopenharmony_ci#include <linux/types.h> 248c2ecf20Sopenharmony_ci#include <asm/byteorder.h> 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_cistatic inline u16 sh4a_get_unaligned_cpu16(const u8 *p) 278c2ecf20Sopenharmony_ci{ 288c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 298c2ecf20Sopenharmony_ci return p[0] | p[1] << 8; 308c2ecf20Sopenharmony_ci#else 318c2ecf20Sopenharmony_ci return p[0] << 8 | p[1]; 328c2ecf20Sopenharmony_ci#endif 338c2ecf20Sopenharmony_ci} 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_cistatic __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p) 368c2ecf20Sopenharmony_ci{ 378c2ecf20Sopenharmony_ci unsigned long unaligned; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 408c2ecf20Sopenharmony_ci "movua.l @%1, %0\n\t" 418c2ecf20Sopenharmony_ci : "=z" (unaligned) 428c2ecf20Sopenharmony_ci : "r" (p) 438c2ecf20Sopenharmony_ci ); 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci return unaligned; 468c2ecf20Sopenharmony_ci} 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci/* 498c2ecf20Sopenharmony_ci * Even though movua.l supports auto-increment on the read side, it can 508c2ecf20Sopenharmony_ci * only store to r0 due to instruction encoding constraints, so just let 518c2ecf20Sopenharmony_ci * the compiler sort it out on its own. 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_cistatic inline u64 sh4a_get_unaligned_cpu64(const u8 *p) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 568c2ecf20Sopenharmony_ci return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 | 578c2ecf20Sopenharmony_ci sh4a_get_unaligned_cpu32(p); 588c2ecf20Sopenharmony_ci#else 598c2ecf20Sopenharmony_ci return (u64)sh4a_get_unaligned_cpu32(p) << 32 | 608c2ecf20Sopenharmony_ci sh4a_get_unaligned_cpu32(p + 4); 618c2ecf20Sopenharmony_ci#endif 628c2ecf20Sopenharmony_ci} 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistatic inline u16 get_unaligned_le16(const void *p) 658c2ecf20Sopenharmony_ci{ 668c2ecf20Sopenharmony_ci return le16_to_cpu(sh4a_get_unaligned_cpu16(p)); 678c2ecf20Sopenharmony_ci} 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_cistatic inline u32 get_unaligned_le32(const void *p) 708c2ecf20Sopenharmony_ci{ 718c2ecf20Sopenharmony_ci return le32_to_cpu(sh4a_get_unaligned_cpu32(p)); 728c2ecf20Sopenharmony_ci} 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_cistatic inline u64 get_unaligned_le64(const void *p) 758c2ecf20Sopenharmony_ci{ 768c2ecf20Sopenharmony_ci return le64_to_cpu(sh4a_get_unaligned_cpu64(p)); 778c2ecf20Sopenharmony_ci} 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_cistatic inline u16 get_unaligned_be16(const void *p) 808c2ecf20Sopenharmony_ci{ 818c2ecf20Sopenharmony_ci return be16_to_cpu(sh4a_get_unaligned_cpu16(p)); 828c2ecf20Sopenharmony_ci} 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_cistatic inline u32 get_unaligned_be32(const void *p) 858c2ecf20Sopenharmony_ci{ 868c2ecf20Sopenharmony_ci return be32_to_cpu(sh4a_get_unaligned_cpu32(p)); 878c2ecf20Sopenharmony_ci} 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_cistatic inline u64 get_unaligned_be64(const void *p) 908c2ecf20Sopenharmony_ci{ 918c2ecf20Sopenharmony_ci return be64_to_cpu(sh4a_get_unaligned_cpu64(p)); 928c2ecf20Sopenharmony_ci} 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_cistatic inline void nonnative_put_le16(u16 val, u8 *p) 958c2ecf20Sopenharmony_ci{ 968c2ecf20Sopenharmony_ci *p++ = val; 978c2ecf20Sopenharmony_ci *p++ = val >> 8; 988c2ecf20Sopenharmony_ci} 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_cistatic inline void nonnative_put_le32(u32 val, u8 *p) 1018c2ecf20Sopenharmony_ci{ 1028c2ecf20Sopenharmony_ci nonnative_put_le16(val, p); 1038c2ecf20Sopenharmony_ci nonnative_put_le16(val >> 16, p + 2); 1048c2ecf20Sopenharmony_ci} 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_cistatic inline void nonnative_put_le64(u64 val, u8 *p) 1078c2ecf20Sopenharmony_ci{ 1088c2ecf20Sopenharmony_ci nonnative_put_le32(val, p); 1098c2ecf20Sopenharmony_ci nonnative_put_le32(val >> 32, p + 4); 1108c2ecf20Sopenharmony_ci} 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_cistatic inline void nonnative_put_be16(u16 val, u8 *p) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci *p++ = val >> 8; 1158c2ecf20Sopenharmony_ci *p++ = val; 1168c2ecf20Sopenharmony_ci} 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_cistatic inline void nonnative_put_be32(u32 val, u8 *p) 1198c2ecf20Sopenharmony_ci{ 1208c2ecf20Sopenharmony_ci nonnative_put_be16(val >> 16, p); 1218c2ecf20Sopenharmony_ci nonnative_put_be16(val, p + 2); 1228c2ecf20Sopenharmony_ci} 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_cistatic inline void nonnative_put_be64(u64 val, u8 *p) 1258c2ecf20Sopenharmony_ci{ 1268c2ecf20Sopenharmony_ci nonnative_put_be32(val >> 32, p); 1278c2ecf20Sopenharmony_ci nonnative_put_be32(val, p + 4); 1288c2ecf20Sopenharmony_ci} 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_cistatic inline void put_unaligned_le16(u16 val, void *p) 1318c2ecf20Sopenharmony_ci{ 1328c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 1338c2ecf20Sopenharmony_ci __put_unaligned_cpu16(val, p); 1348c2ecf20Sopenharmony_ci#else 1358c2ecf20Sopenharmony_ci nonnative_put_le16(val, p); 1368c2ecf20Sopenharmony_ci#endif 1378c2ecf20Sopenharmony_ci} 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_cistatic inline void put_unaligned_le32(u32 val, void *p) 1408c2ecf20Sopenharmony_ci{ 1418c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 1428c2ecf20Sopenharmony_ci __put_unaligned_cpu32(val, p); 1438c2ecf20Sopenharmony_ci#else 1448c2ecf20Sopenharmony_ci nonnative_put_le32(val, p); 1458c2ecf20Sopenharmony_ci#endif 1468c2ecf20Sopenharmony_ci} 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_cistatic inline void put_unaligned_le64(u64 val, void *p) 1498c2ecf20Sopenharmony_ci{ 1508c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 1518c2ecf20Sopenharmony_ci __put_unaligned_cpu64(val, p); 1528c2ecf20Sopenharmony_ci#else 1538c2ecf20Sopenharmony_ci nonnative_put_le64(val, p); 1548c2ecf20Sopenharmony_ci#endif 1558c2ecf20Sopenharmony_ci} 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_cistatic inline void put_unaligned_be16(u16 val, void *p) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 1608c2ecf20Sopenharmony_ci __put_unaligned_cpu16(val, p); 1618c2ecf20Sopenharmony_ci#else 1628c2ecf20Sopenharmony_ci nonnative_put_be16(val, p); 1638c2ecf20Sopenharmony_ci#endif 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_cistatic inline void put_unaligned_be32(u32 val, void *p) 1678c2ecf20Sopenharmony_ci{ 1688c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 1698c2ecf20Sopenharmony_ci __put_unaligned_cpu32(val, p); 1708c2ecf20Sopenharmony_ci#else 1718c2ecf20Sopenharmony_ci nonnative_put_be32(val, p); 1728c2ecf20Sopenharmony_ci#endif 1738c2ecf20Sopenharmony_ci} 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_cistatic inline void put_unaligned_be64(u64 val, void *p) 1768c2ecf20Sopenharmony_ci{ 1778c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 1788c2ecf20Sopenharmony_ci __put_unaligned_cpu64(val, p); 1798c2ecf20Sopenharmony_ci#else 1808c2ecf20Sopenharmony_ci nonnative_put_be64(val, p); 1818c2ecf20Sopenharmony_ci#endif 1828c2ecf20Sopenharmony_ci} 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci/* 1858c2ecf20Sopenharmony_ci * While it's a bit non-obvious, even though the generic le/be wrappers 1868c2ecf20Sopenharmony_ci * use the __get/put_xxx prefixing, they actually wrap in to the 1878c2ecf20Sopenharmony_ci * non-prefixed get/put_xxx variants as provided above. 1888c2ecf20Sopenharmony_ci */ 1898c2ecf20Sopenharmony_ci#include <linux/unaligned/generic.h> 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 1928c2ecf20Sopenharmony_ci# define get_unaligned __get_unaligned_le 1938c2ecf20Sopenharmony_ci# define put_unaligned __put_unaligned_le 1948c2ecf20Sopenharmony_ci#else 1958c2ecf20Sopenharmony_ci# define get_unaligned __get_unaligned_be 1968c2ecf20Sopenharmony_ci# define put_unaligned __put_unaligned_be 1978c2ecf20Sopenharmony_ci#endif 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci#endif /* __ASM_SH_UNALIGNED_SH4A_H */ 200