18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * strlen() for PPC32
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Inspired from glibc implementation
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
108c2ecf20Sopenharmony_ci#include <asm/export.h>
118c2ecf20Sopenharmony_ci#include <asm/cache.h>
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci	.text
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci/*
168c2ecf20Sopenharmony_ci * Algorithm:
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * 1) Given a word 'x', we can test to see if it contains any 0 bytes
198c2ecf20Sopenharmony_ci *    by subtracting 0x01010101, and seeing if any of the high bits of each
208c2ecf20Sopenharmony_ci *    byte changed from 0 to 1. This works because the least significant
218c2ecf20Sopenharmony_ci *    0 byte must have had no incoming carry (otherwise it's not the least
228c2ecf20Sopenharmony_ci *    significant), so it is 0x00 - 0x01 == 0xff. For all other
238c2ecf20Sopenharmony_ci *    byte values, either they have the high bit set initially, or when
248c2ecf20Sopenharmony_ci *    1 is subtracted you get a value in the range 0x00-0x7f, none of which
258c2ecf20Sopenharmony_ci *    have their high bit set. The expression here is
268c2ecf20Sopenharmony_ci *    (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
278c2ecf20Sopenharmony_ci *    there were no 0x00 bytes in the word.  You get 0x80 in bytes that
288c2ecf20Sopenharmony_ci *    match, but possibly false 0x80 matches in the next more significant
298c2ecf20Sopenharmony_ci *    byte to a true match due to carries.  For little-endian this is
308c2ecf20Sopenharmony_ci *    of no consequence since the least significant match is the one
318c2ecf20Sopenharmony_ci *    we're interested in, but big-endian needs method 2 to find which
328c2ecf20Sopenharmony_ci *    byte matches.
338c2ecf20Sopenharmony_ci * 2) Given a word 'x', we can test to see _which_ byte was zero by
348c2ecf20Sopenharmony_ci *    calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
358c2ecf20Sopenharmony_ci *    This produces 0x80 in each byte that was zero, and 0x00 in all
368c2ecf20Sopenharmony_ci *    the other bytes. The '| ~0x80808080' clears the low 7 bits in each
378c2ecf20Sopenharmony_ci *    byte, and the '| x' part ensures that bytes with the high bit set
388c2ecf20Sopenharmony_ci *    produce 0x00. The addition will carry into the high bit of each byte
398c2ecf20Sopenharmony_ci *    iff that byte had one of its low 7 bits set. We can then just see
408c2ecf20Sopenharmony_ci *    which was the most significant bit set and divide by 8 to find how
418c2ecf20Sopenharmony_ci *    many to add to the index.
428c2ecf20Sopenharmony_ci *    This is from the book 'The PowerPC Compiler Writer's Guide',
438c2ecf20Sopenharmony_ci *    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
448c2ecf20Sopenharmony_ci */
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci_GLOBAL(strlen)
478c2ecf20Sopenharmony_ci	andi.   r0, r3, 3
488c2ecf20Sopenharmony_ci	lis	r7, 0x0101
498c2ecf20Sopenharmony_ci	addi	r10, r3, -4
508c2ecf20Sopenharmony_ci	addic	r7, r7, 0x0101	/* r7 = 0x01010101 (lomagic) & clear XER[CA] */
518c2ecf20Sopenharmony_ci	rotlwi	r6, r7, 31 	/* r6 = 0x80808080 (himagic) */
528c2ecf20Sopenharmony_ci	bne-	3f
538c2ecf20Sopenharmony_ci	.balign IFETCH_ALIGN_BYTES
548c2ecf20Sopenharmony_ci1:	lwzu	r9, 4(r10)
558c2ecf20Sopenharmony_ci2:	subf	r8, r7, r9
568c2ecf20Sopenharmony_ci	and.	r8, r8, r6
578c2ecf20Sopenharmony_ci	beq+	1b
588c2ecf20Sopenharmony_ci	andc.	r8, r8, r9
598c2ecf20Sopenharmony_ci	beq+	1b
608c2ecf20Sopenharmony_ci	andc	r8, r9, r6
618c2ecf20Sopenharmony_ci	orc	r9, r9, r6
628c2ecf20Sopenharmony_ci	subfe	r8, r6, r8
638c2ecf20Sopenharmony_ci	nor	r8, r8, r9
648c2ecf20Sopenharmony_ci	cntlzw	r8, r8
658c2ecf20Sopenharmony_ci	subf	r3, r3, r10
668c2ecf20Sopenharmony_ci	srwi	r8, r8, 3
678c2ecf20Sopenharmony_ci	add	r3, r3, r8
688c2ecf20Sopenharmony_ci	blr
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	/* Missaligned string: make sure bytes before string are seen not 0 */
718c2ecf20Sopenharmony_ci3:	xor	r10, r10, r0
728c2ecf20Sopenharmony_ci	orc	r8, r8, r8
738c2ecf20Sopenharmony_ci	lwzu	r9, 4(r10)
748c2ecf20Sopenharmony_ci	slwi	r0, r0, 3
758c2ecf20Sopenharmony_ci	srw	r8, r8, r0
768c2ecf20Sopenharmony_ci	orc	r9, r9, r8
778c2ecf20Sopenharmony_ci	b	2b
788c2ecf20Sopenharmony_ciEXPORT_SYMBOL(strlen)
79