18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci#ifndef __ASM_SH_UNALIGNED_SH4A_H
38c2ecf20Sopenharmony_ci#define __ASM_SH_UNALIGNED_SH4A_H
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci/*
68c2ecf20Sopenharmony_ci * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
78c2ecf20Sopenharmony_ci * Support for 64-bit accesses are done through shifting and masking
88c2ecf20Sopenharmony_ci * relative to the endianness. Unaligned stores are not supported by the
98c2ecf20Sopenharmony_ci * instruction encoding, so these continue to use the packed
108c2ecf20Sopenharmony_ci * struct.
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * The same note as with the movli.l/movco.l pair applies here, as long
138c2ecf20Sopenharmony_ci * as the load is guaranteed to be inlined, nothing else will hook in to
148c2ecf20Sopenharmony_ci * r0 and we get the return value for free.
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * NOTE: Due to the fact we require r0 encoding, care should be taken to
178c2ecf20Sopenharmony_ci * avoid mixing these heavily with other r0 consumers, such as the atomic
188c2ecf20Sopenharmony_ci * ops. Failure to adhere to this can result in the compiler running out
198c2ecf20Sopenharmony_ci * of spill registers and blowing up when building at low optimization
208c2ecf20Sopenharmony_ci * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
218c2ecf20Sopenharmony_ci */
228c2ecf20Sopenharmony_ci#include <linux/unaligned/packed_struct.h>
238c2ecf20Sopenharmony_ci#include <linux/types.h>
248c2ecf20Sopenharmony_ci#include <asm/byteorder.h>
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_cistatic inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
278c2ecf20Sopenharmony_ci{
288c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN
298c2ecf20Sopenharmony_ci	return p[0] | p[1] << 8;
308c2ecf20Sopenharmony_ci#else
318c2ecf20Sopenharmony_ci	return p[0] << 8 | p[1];
328c2ecf20Sopenharmony_ci#endif
338c2ecf20Sopenharmony_ci}
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistatic __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
368c2ecf20Sopenharmony_ci{
378c2ecf20Sopenharmony_ci	unsigned long unaligned;
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	__asm__ __volatile__ (
408c2ecf20Sopenharmony_ci		"movua.l	@%1, %0\n\t"
418c2ecf20Sopenharmony_ci		 : "=z" (unaligned)
428c2ecf20Sopenharmony_ci		 : "r" (p)
438c2ecf20Sopenharmony_ci	);
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	return unaligned;
468c2ecf20Sopenharmony_ci}
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci/*
498c2ecf20Sopenharmony_ci * Even though movua.l supports auto-increment on the read side, it can
508c2ecf20Sopenharmony_ci * only store to r0 due to instruction encoding constraints, so just let
518c2ecf20Sopenharmony_ci * the compiler sort it out on its own.
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_cistatic inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
548c2ecf20Sopenharmony_ci{
558c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN
568c2ecf20Sopenharmony_ci	return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
578c2ecf20Sopenharmony_ci		    sh4a_get_unaligned_cpu32(p);
588c2ecf20Sopenharmony_ci#else
598c2ecf20Sopenharmony_ci	return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
608c2ecf20Sopenharmony_ci		    sh4a_get_unaligned_cpu32(p + 4);
618c2ecf20Sopenharmony_ci#endif
628c2ecf20Sopenharmony_ci}
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_cistatic inline u16 get_unaligned_le16(const void *p)
658c2ecf20Sopenharmony_ci{
668c2ecf20Sopenharmony_ci	return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
678c2ecf20Sopenharmony_ci}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_cistatic inline u32 get_unaligned_le32(const void *p)
708c2ecf20Sopenharmony_ci{
718c2ecf20Sopenharmony_ci	return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
728c2ecf20Sopenharmony_ci}
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_cistatic inline u64 get_unaligned_le64(const void *p)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci	return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
778c2ecf20Sopenharmony_ci}
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cistatic inline u16 get_unaligned_be16(const void *p)
808c2ecf20Sopenharmony_ci{
818c2ecf20Sopenharmony_ci	return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
828c2ecf20Sopenharmony_ci}
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_cistatic inline u32 get_unaligned_be32(const void *p)
858c2ecf20Sopenharmony_ci{
868c2ecf20Sopenharmony_ci	return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
878c2ecf20Sopenharmony_ci}
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_cistatic inline u64 get_unaligned_be64(const void *p)
908c2ecf20Sopenharmony_ci{
918c2ecf20Sopenharmony_ci	return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_cistatic inline void nonnative_put_le16(u16 val, u8 *p)
958c2ecf20Sopenharmony_ci{
968c2ecf20Sopenharmony_ci	*p++ = val;
978c2ecf20Sopenharmony_ci	*p++ = val >> 8;
988c2ecf20Sopenharmony_ci}
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_cistatic inline void nonnative_put_le32(u32 val, u8 *p)
1018c2ecf20Sopenharmony_ci{
1028c2ecf20Sopenharmony_ci	nonnative_put_le16(val, p);
1038c2ecf20Sopenharmony_ci	nonnative_put_le16(val >> 16, p + 2);
1048c2ecf20Sopenharmony_ci}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_cistatic inline void nonnative_put_le64(u64 val, u8 *p)
1078c2ecf20Sopenharmony_ci{
1088c2ecf20Sopenharmony_ci	nonnative_put_le32(val, p);
1098c2ecf20Sopenharmony_ci	nonnative_put_le32(val >> 32, p + 4);
1108c2ecf20Sopenharmony_ci}
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_cistatic inline void nonnative_put_be16(u16 val, u8 *p)
1138c2ecf20Sopenharmony_ci{
1148c2ecf20Sopenharmony_ci	*p++ = val >> 8;
1158c2ecf20Sopenharmony_ci	*p++ = val;
1168c2ecf20Sopenharmony_ci}
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_cistatic inline void nonnative_put_be32(u32 val, u8 *p)
1198c2ecf20Sopenharmony_ci{
1208c2ecf20Sopenharmony_ci	nonnative_put_be16(val >> 16, p);
1218c2ecf20Sopenharmony_ci	nonnative_put_be16(val, p + 2);
1228c2ecf20Sopenharmony_ci}
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_cistatic inline void nonnative_put_be64(u64 val, u8 *p)
1258c2ecf20Sopenharmony_ci{
1268c2ecf20Sopenharmony_ci	nonnative_put_be32(val >> 32, p);
1278c2ecf20Sopenharmony_ci	nonnative_put_be32(val, p + 4);
1288c2ecf20Sopenharmony_ci}
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_cistatic inline void put_unaligned_le16(u16 val, void *p)
1318c2ecf20Sopenharmony_ci{
1328c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN
1338c2ecf20Sopenharmony_ci	__put_unaligned_cpu16(val, p);
1348c2ecf20Sopenharmony_ci#else
1358c2ecf20Sopenharmony_ci	nonnative_put_le16(val, p);
1368c2ecf20Sopenharmony_ci#endif
1378c2ecf20Sopenharmony_ci}
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_cistatic inline void put_unaligned_le32(u32 val, void *p)
1408c2ecf20Sopenharmony_ci{
1418c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN
1428c2ecf20Sopenharmony_ci	__put_unaligned_cpu32(val, p);
1438c2ecf20Sopenharmony_ci#else
1448c2ecf20Sopenharmony_ci	nonnative_put_le32(val, p);
1458c2ecf20Sopenharmony_ci#endif
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistatic inline void put_unaligned_le64(u64 val, void *p)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN
1518c2ecf20Sopenharmony_ci	__put_unaligned_cpu64(val, p);
1528c2ecf20Sopenharmony_ci#else
1538c2ecf20Sopenharmony_ci	nonnative_put_le64(val, p);
1548c2ecf20Sopenharmony_ci#endif
1558c2ecf20Sopenharmony_ci}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistatic inline void put_unaligned_be16(u16 val, void *p)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
1608c2ecf20Sopenharmony_ci	__put_unaligned_cpu16(val, p);
1618c2ecf20Sopenharmony_ci#else
1628c2ecf20Sopenharmony_ci	nonnative_put_be16(val, p);
1638c2ecf20Sopenharmony_ci#endif
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_cistatic inline void put_unaligned_be32(u32 val, void *p)
1678c2ecf20Sopenharmony_ci{
1688c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
1698c2ecf20Sopenharmony_ci	__put_unaligned_cpu32(val, p);
1708c2ecf20Sopenharmony_ci#else
1718c2ecf20Sopenharmony_ci	nonnative_put_be32(val, p);
1728c2ecf20Sopenharmony_ci#endif
1738c2ecf20Sopenharmony_ci}
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_cistatic inline void put_unaligned_be64(u64 val, void *p)
1768c2ecf20Sopenharmony_ci{
1778c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN
1788c2ecf20Sopenharmony_ci	__put_unaligned_cpu64(val, p);
1798c2ecf20Sopenharmony_ci#else
1808c2ecf20Sopenharmony_ci	nonnative_put_be64(val, p);
1818c2ecf20Sopenharmony_ci#endif
1828c2ecf20Sopenharmony_ci}
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci/*
1858c2ecf20Sopenharmony_ci * While it's a bit non-obvious, even though the generic le/be wrappers
1868c2ecf20Sopenharmony_ci * use the __get/put_xxx prefixing, they actually wrap in to the
1878c2ecf20Sopenharmony_ci * non-prefixed get/put_xxx variants as provided above.
1888c2ecf20Sopenharmony_ci */
1898c2ecf20Sopenharmony_ci#include <linux/unaligned/generic.h>
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN
1928c2ecf20Sopenharmony_ci# define get_unaligned __get_unaligned_le
1938c2ecf20Sopenharmony_ci# define put_unaligned __put_unaligned_le
1948c2ecf20Sopenharmony_ci#else
1958c2ecf20Sopenharmony_ci# define get_unaligned __get_unaligned_be
1968c2ecf20Sopenharmony_ci# define put_unaligned __put_unaligned_be
1978c2ecf20Sopenharmony_ci#endif
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci#endif /* __ASM_SH_UNALIGNED_SH4A_H */
200