162306a36Sopenharmony_ci#ifndef _ASM_WORD_AT_A_TIME_H
262306a36Sopenharmony_ci#define _ASM_WORD_AT_A_TIME_H
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci/*
562306a36Sopenharmony_ci * Word-at-a-time interfaces for PowerPC.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/kernel.h>
962306a36Sopenharmony_ci#include <asm/asm-compat.h>
1062306a36Sopenharmony_ci#include <asm/extable.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_cistruct word_at_a_time {
1562306a36Sopenharmony_ci	const unsigned long high_bits, low_bits;
1662306a36Sopenharmony_ci};
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci/* Bit set in the bytes that have a zero */
2162306a36Sopenharmony_cistatic inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
2262306a36Sopenharmony_ci{
2362306a36Sopenharmony_ci	unsigned long mask = (val & c->low_bits) + c->low_bits;
2462306a36Sopenharmony_ci	return ~(mask | rhs);
2562306a36Sopenharmony_ci}
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define create_zero_mask(mask) (mask)
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cistatic inline long find_zero(unsigned long mask)
3062306a36Sopenharmony_ci{
3162306a36Sopenharmony_ci	long leading_zero_bits;
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
3462306a36Sopenharmony_ci	return leading_zero_bits >> 3;
3562306a36Sopenharmony_ci}
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
3862306a36Sopenharmony_ci{
3962306a36Sopenharmony_ci	unsigned long rhs = val | c->low_bits;
4062306a36Sopenharmony_ci	*data = rhs;
4162306a36Sopenharmony_ci	return (val + c->high_bits) & ~rhs;
4262306a36Sopenharmony_ci}
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic inline unsigned long zero_bytemask(unsigned long mask)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	return ~1ul << __fls(mask);
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#else
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci#ifdef CONFIG_64BIT
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/* unused */
5462306a36Sopenharmony_cistruct word_at_a_time {
5562306a36Sopenharmony_ci};
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci#define WORD_AT_A_TIME_CONSTANTS { }
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci/* This will give us 0xff for a NULL char and 0x00 elsewhere */
6062306a36Sopenharmony_cistatic inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
6162306a36Sopenharmony_ci{
6262306a36Sopenharmony_ci	unsigned long ret;
6362306a36Sopenharmony_ci	unsigned long zero = 0;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero));
6662306a36Sopenharmony_ci	*bits = ret;
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	return ret;
6962306a36Sopenharmony_ci}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	return bits;
7462306a36Sopenharmony_ci}
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci/* Alan Modra's little-endian strlen tail for 64-bit */
7762306a36Sopenharmony_cistatic inline unsigned long create_zero_mask(unsigned long bits)
7862306a36Sopenharmony_ci{
7962306a36Sopenharmony_ci	unsigned long leading_zero_bits;
8062306a36Sopenharmony_ci	long trailing_zero_bit_mask;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	asm("addi	%1,%2,-1\n\t"
8362306a36Sopenharmony_ci	    "andc	%1,%1,%2\n\t"
8462306a36Sopenharmony_ci	    "popcntd	%0,%1"
8562306a36Sopenharmony_ci		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
8662306a36Sopenharmony_ci		: "b" (bits));
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	return leading_zero_bits;
8962306a36Sopenharmony_ci}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistatic inline unsigned long find_zero(unsigned long mask)
9262306a36Sopenharmony_ci{
9362306a36Sopenharmony_ci	return mask >> 3;
9462306a36Sopenharmony_ci}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci/* This assumes that we never ask for an all 1s bitmask */
9762306a36Sopenharmony_cistatic inline unsigned long zero_bytemask(unsigned long mask)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	return (1UL << mask) - 1;
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci#else	/* 32-bit case */
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cistruct word_at_a_time {
10562306a36Sopenharmony_ci	const unsigned long one_bits, high_bits;
10662306a36Sopenharmony_ci};
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci/*
11162306a36Sopenharmony_ci * This is largely generic for little-endian machines, but the
11262306a36Sopenharmony_ci * optimal byte mask counting is probably going to be something
11362306a36Sopenharmony_ci * that is architecture-specific. If you have a reliably fast
11462306a36Sopenharmony_ci * bit count instruction, that might be better than the multiply
11562306a36Sopenharmony_ci * and shift, for example.
11662306a36Sopenharmony_ci */
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
11962306a36Sopenharmony_cistatic inline long count_masked_bytes(long mask)
12062306a36Sopenharmony_ci{
12162306a36Sopenharmony_ci	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
12262306a36Sopenharmony_ci	long a = (0x0ff0001+mask) >> 23;
12362306a36Sopenharmony_ci	/* Fix the 1 for 00 case */
12462306a36Sopenharmony_ci	return a & mask;
12562306a36Sopenharmony_ci}
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_cistatic inline unsigned long create_zero_mask(unsigned long bits)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	bits = (bits - 1) & ~bits;
13062306a36Sopenharmony_ci	return bits >> 7;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic inline unsigned long find_zero(unsigned long mask)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	return count_masked_bytes(mask);
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci/* Return nonzero if it has a zero */
13962306a36Sopenharmony_cistatic inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
14262306a36Sopenharmony_ci	*bits = mask;
14362306a36Sopenharmony_ci	return mask;
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_cistatic inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	return bits;
14962306a36Sopenharmony_ci}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci/* The mask we created is directly usable as a bytemask */
15262306a36Sopenharmony_ci#define zero_bytemask(mask) (mask)
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci#endif /* CONFIG_64BIT */
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci#endif /* __BIG_ENDIAN__ */
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci/*
15962306a36Sopenharmony_ci * We use load_unaligned_zero() in a selftest, which builds a userspace
16062306a36Sopenharmony_ci * program. Some linker scripts seem to discard the .fixup section, so allow
16162306a36Sopenharmony_ci * the test code to use a different section name.
16262306a36Sopenharmony_ci */
16362306a36Sopenharmony_ci#ifndef FIXUP_SECTION
16462306a36Sopenharmony_ci#define FIXUP_SECTION ".fixup"
16562306a36Sopenharmony_ci#endif
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_cistatic inline unsigned long load_unaligned_zeropad(const void *addr)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	unsigned long ret, offset, tmp;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	asm(
17262306a36Sopenharmony_ci	"1:	" PPC_LL "%[ret], 0(%[addr])\n"
17362306a36Sopenharmony_ci	"2:\n"
17462306a36Sopenharmony_ci	".section " FIXUP_SECTION ",\"ax\"\n"
17562306a36Sopenharmony_ci	"3:	"
17662306a36Sopenharmony_ci#ifdef __powerpc64__
17762306a36Sopenharmony_ci	"clrrdi		%[tmp], %[addr], 3\n\t"
17862306a36Sopenharmony_ci	"clrlsldi	%[offset], %[addr], 61, 3\n\t"
17962306a36Sopenharmony_ci	"ld		%[ret], 0(%[tmp])\n\t"
18062306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
18162306a36Sopenharmony_ci	"sld		%[ret], %[ret], %[offset]\n\t"
18262306a36Sopenharmony_ci#else
18362306a36Sopenharmony_ci	"srd		%[ret], %[ret], %[offset]\n\t"
18462306a36Sopenharmony_ci#endif
18562306a36Sopenharmony_ci#else
18662306a36Sopenharmony_ci	"clrrwi		%[tmp], %[addr], 2\n\t"
18762306a36Sopenharmony_ci	"clrlslwi	%[offset], %[addr], 30, 3\n\t"
18862306a36Sopenharmony_ci	"lwz		%[ret], 0(%[tmp])\n\t"
18962306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
19062306a36Sopenharmony_ci	"slw		%[ret], %[ret], %[offset]\n\t"
19162306a36Sopenharmony_ci#else
19262306a36Sopenharmony_ci	"srw		%[ret], %[ret], %[offset]\n\t"
19362306a36Sopenharmony_ci#endif
19462306a36Sopenharmony_ci#endif
19562306a36Sopenharmony_ci	"b	2b\n"
19662306a36Sopenharmony_ci	".previous\n"
19762306a36Sopenharmony_ci	EX_TABLE(1b, 3b)
19862306a36Sopenharmony_ci	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
19962306a36Sopenharmony_ci	: [addr] "b" (addr), "m" (*(unsigned long *)addr));
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	return ret;
20262306a36Sopenharmony_ci}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci#undef FIXUP_SECTION
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci#endif /* _ASM_WORD_AT_A_TIME_H */
207