18c2ecf20Sopenharmony_ci#ifndef _ASM_WORD_AT_A_TIME_H
28c2ecf20Sopenharmony_ci#define _ASM_WORD_AT_A_TIME_H
38c2ecf20Sopenharmony_ci
48c2ecf20Sopenharmony_ci/*
58c2ecf20Sopenharmony_ci * Word-at-a-time interfaces for PowerPC.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/kernel.h>
98c2ecf20Sopenharmony_ci#include <asm/asm-compat.h>
108c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_cistruct word_at_a_time {
158c2ecf20Sopenharmony_ci	const unsigned long high_bits, low_bits;
168c2ecf20Sopenharmony_ci};
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci/* Bit set in the bytes that have a zero */
218c2ecf20Sopenharmony_cistatic inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
228c2ecf20Sopenharmony_ci{
238c2ecf20Sopenharmony_ci	unsigned long mask = (val & c->low_bits) + c->low_bits;
248c2ecf20Sopenharmony_ci	return ~(mask | rhs);
258c2ecf20Sopenharmony_ci}
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci#define create_zero_mask(mask) (mask)
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_cistatic inline long find_zero(unsigned long mask)
308c2ecf20Sopenharmony_ci{
318c2ecf20Sopenharmony_ci	long leading_zero_bits;
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
348c2ecf20Sopenharmony_ci	return leading_zero_bits >> 3;
358c2ecf20Sopenharmony_ci}
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_cistatic inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
388c2ecf20Sopenharmony_ci{
398c2ecf20Sopenharmony_ci	unsigned long rhs = val | c->low_bits;
408c2ecf20Sopenharmony_ci	*data = rhs;
418c2ecf20Sopenharmony_ci	return (val + c->high_bits) & ~rhs;
428c2ecf20Sopenharmony_ci}
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_cistatic inline unsigned long zero_bytemask(unsigned long mask)
458c2ecf20Sopenharmony_ci{
468c2ecf20Sopenharmony_ci	return ~1ul << __fls(mask);
478c2ecf20Sopenharmony_ci}
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci#else
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci#ifdef CONFIG_64BIT
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci/* unused */
548c2ecf20Sopenharmony_cistruct word_at_a_time {
558c2ecf20Sopenharmony_ci};
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci#define WORD_AT_A_TIME_CONSTANTS { }
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci/* This will give us 0xff for a NULL char and 0x00 elsewhere */
608c2ecf20Sopenharmony_cistatic inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
618c2ecf20Sopenharmony_ci{
628c2ecf20Sopenharmony_ci	unsigned long ret;
638c2ecf20Sopenharmony_ci	unsigned long zero = 0;
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero));
668c2ecf20Sopenharmony_ci	*bits = ret;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	return ret;
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cistatic inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
728c2ecf20Sopenharmony_ci{
738c2ecf20Sopenharmony_ci	return bits;
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci/* Alan Modra's little-endian strlen tail for 64-bit */
778c2ecf20Sopenharmony_cistatic inline unsigned long create_zero_mask(unsigned long bits)
788c2ecf20Sopenharmony_ci{
798c2ecf20Sopenharmony_ci	unsigned long leading_zero_bits;
808c2ecf20Sopenharmony_ci	long trailing_zero_bit_mask;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	asm("addi	%1,%2,-1\n\t"
838c2ecf20Sopenharmony_ci	    "andc	%1,%1,%2\n\t"
848c2ecf20Sopenharmony_ci	    "popcntd	%0,%1"
858c2ecf20Sopenharmony_ci		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
868c2ecf20Sopenharmony_ci		: "b" (bits));
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci	return leading_zero_bits;
898c2ecf20Sopenharmony_ci}
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_cistatic inline unsigned long find_zero(unsigned long mask)
928c2ecf20Sopenharmony_ci{
938c2ecf20Sopenharmony_ci	return mask >> 3;
948c2ecf20Sopenharmony_ci}
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci/* This assumes that we never ask for an all 1s bitmask */
978c2ecf20Sopenharmony_cistatic inline unsigned long zero_bytemask(unsigned long mask)
988c2ecf20Sopenharmony_ci{
998c2ecf20Sopenharmony_ci	return (1UL << mask) - 1;
1008c2ecf20Sopenharmony_ci}
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci#else	/* 32-bit case */
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cistruct word_at_a_time {
1058c2ecf20Sopenharmony_ci	const unsigned long one_bits, high_bits;
1068c2ecf20Sopenharmony_ci};
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci/*
1118c2ecf20Sopenharmony_ci * This is largely generic for little-endian machines, but the
1128c2ecf20Sopenharmony_ci * optimal byte mask counting is probably going to be something
1138c2ecf20Sopenharmony_ci * that is architecture-specific. If you have a reliably fast
1148c2ecf20Sopenharmony_ci * bit count instruction, that might be better than the multiply
1158c2ecf20Sopenharmony_ci * and shift, for example.
1168c2ecf20Sopenharmony_ci */
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
1198c2ecf20Sopenharmony_cistatic inline long count_masked_bytes(long mask)
1208c2ecf20Sopenharmony_ci{
1218c2ecf20Sopenharmony_ci	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
1228c2ecf20Sopenharmony_ci	long a = (0x0ff0001+mask) >> 23;
1238c2ecf20Sopenharmony_ci	/* Fix the 1 for 00 case */
1248c2ecf20Sopenharmony_ci	return a & mask;
1258c2ecf20Sopenharmony_ci}
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_cistatic inline unsigned long create_zero_mask(unsigned long bits)
1288c2ecf20Sopenharmony_ci{
1298c2ecf20Sopenharmony_ci	bits = (bits - 1) & ~bits;
1308c2ecf20Sopenharmony_ci	return bits >> 7;
1318c2ecf20Sopenharmony_ci}
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cistatic inline unsigned long find_zero(unsigned long mask)
1348c2ecf20Sopenharmony_ci{
1358c2ecf20Sopenharmony_ci	return count_masked_bytes(mask);
1368c2ecf20Sopenharmony_ci}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci/* Return nonzero if it has a zero */
1398c2ecf20Sopenharmony_cistatic inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
1408c2ecf20Sopenharmony_ci{
1418c2ecf20Sopenharmony_ci	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
1428c2ecf20Sopenharmony_ci	*bits = mask;
1438c2ecf20Sopenharmony_ci	return mask;
1448c2ecf20Sopenharmony_ci}
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_cistatic inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
1478c2ecf20Sopenharmony_ci{
1488c2ecf20Sopenharmony_ci	return bits;
1498c2ecf20Sopenharmony_ci}
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci/* The mask we created is directly usable as a bytemask */
1528c2ecf20Sopenharmony_ci#define zero_bytemask(mask) (mask)
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci#endif /* CONFIG_64BIT */
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci#endif /* __BIG_ENDIAN__ */
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci/*
1598c2ecf20Sopenharmony_ci * We use load_unaligned_zero() in a selftest, which builds a userspace
1608c2ecf20Sopenharmony_ci * program. Some linker scripts seem to discard the .fixup section, so allow
1618c2ecf20Sopenharmony_ci * the test code to use a different section name.
1628c2ecf20Sopenharmony_ci */
1638c2ecf20Sopenharmony_ci#ifndef FIXUP_SECTION
1648c2ecf20Sopenharmony_ci#define FIXUP_SECTION ".fixup"
1658c2ecf20Sopenharmony_ci#endif
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_cistatic inline unsigned long load_unaligned_zeropad(const void *addr)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	unsigned long ret, offset, tmp;
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	asm(
1728c2ecf20Sopenharmony_ci	"1:	" PPC_LL "%[ret], 0(%[addr])\n"
1738c2ecf20Sopenharmony_ci	"2:\n"
1748c2ecf20Sopenharmony_ci	".section " FIXUP_SECTION ",\"ax\"\n"
1758c2ecf20Sopenharmony_ci	"3:	"
1768c2ecf20Sopenharmony_ci#ifdef __powerpc64__
1778c2ecf20Sopenharmony_ci	"clrrdi		%[tmp], %[addr], 3\n\t"
1788c2ecf20Sopenharmony_ci	"clrlsldi	%[offset], %[addr], 61, 3\n\t"
1798c2ecf20Sopenharmony_ci	"ld		%[ret], 0(%[tmp])\n\t"
1808c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
1818c2ecf20Sopenharmony_ci	"sld		%[ret], %[ret], %[offset]\n\t"
1828c2ecf20Sopenharmony_ci#else
1838c2ecf20Sopenharmony_ci	"srd		%[ret], %[ret], %[offset]\n\t"
1848c2ecf20Sopenharmony_ci#endif
1858c2ecf20Sopenharmony_ci#else
1868c2ecf20Sopenharmony_ci	"clrrwi		%[tmp], %[addr], 2\n\t"
1878c2ecf20Sopenharmony_ci	"clrlslwi	%[offset], %[addr], 30, 3\n\t"
1888c2ecf20Sopenharmony_ci	"lwz		%[ret], 0(%[tmp])\n\t"
1898c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
1908c2ecf20Sopenharmony_ci	"slw		%[ret], %[ret], %[offset]\n\t"
1918c2ecf20Sopenharmony_ci#else
1928c2ecf20Sopenharmony_ci	"srw		%[ret], %[ret], %[offset]\n\t"
1938c2ecf20Sopenharmony_ci#endif
1948c2ecf20Sopenharmony_ci#endif
1958c2ecf20Sopenharmony_ci	"b	2b\n"
1968c2ecf20Sopenharmony_ci	".previous\n"
1978c2ecf20Sopenharmony_ci	EX_TABLE(1b, 3b)
1988c2ecf20Sopenharmony_ci	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
1998c2ecf20Sopenharmony_ci	: [addr] "b" (addr), "m" (*(unsigned long *)addr));
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	return ret;
2028c2ecf20Sopenharmony_ci}
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci#undef FIXUP_SECTION
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci#endif /* _ASM_WORD_AT_A_TIME_H */
207