xref: /kernel/linux/linux-5.10/arch/alpha/lib/strlen.S (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Finds length of a 0-terminated string.  Optimized for the
68c2ecf20Sopenharmony_ci * Alpha architecture:
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci *	- memory accessed as aligned quadwords only
98c2ecf20Sopenharmony_ci *	- uses bcmpge to compare 8 bytes in parallel
108c2ecf20Sopenharmony_ci *	- does binary search to find 0 byte in last
118c2ecf20Sopenharmony_ci *	  quadword (HAKMEM needed 12 instructions to
128c2ecf20Sopenharmony_ci *	  do this instead of the 9 instructions that
138c2ecf20Sopenharmony_ci *	  binary search needs).
148c2ecf20Sopenharmony_ci */
158c2ecf20Sopenharmony_ci#include <asm/export.h>
168c2ecf20Sopenharmony_ci	.set noreorder
178c2ecf20Sopenharmony_ci	.set noat
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci	.align 3
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci	.globl	strlen
228c2ecf20Sopenharmony_ci	.ent	strlen
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistrlen:
258c2ecf20Sopenharmony_ci	ldq_u	$1, 0($16)	# load first quadword ($16  may be misaligned)
268c2ecf20Sopenharmony_ci	lda	$2, -1($31)
278c2ecf20Sopenharmony_ci	insqh	$2, $16, $2
288c2ecf20Sopenharmony_ci	andnot	$16, 7, $0
298c2ecf20Sopenharmony_ci	or	$2, $1, $1
308c2ecf20Sopenharmony_ci	cmpbge	$31, $1, $2	# $2  <- bitmask: bit i == 1 <==> i-th byte == 0
318c2ecf20Sopenharmony_ci	bne	$2, found
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ciloop:	ldq	$1, 8($0)
348c2ecf20Sopenharmony_ci	addq	$0, 8, $0	# addr += 8
358c2ecf20Sopenharmony_ci	nop			# helps dual issue last two insns
368c2ecf20Sopenharmony_ci	cmpbge	$31, $1, $2
378c2ecf20Sopenharmony_ci	beq	$2, loop
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_cifound:	blbs	$2, done	# make aligned case fast
408c2ecf20Sopenharmony_ci	negq	$2, $3
418c2ecf20Sopenharmony_ci	and	$2, $3, $2
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	and	$2, 0x0f, $1
448c2ecf20Sopenharmony_ci	addq	$0, 4, $3
458c2ecf20Sopenharmony_ci	cmoveq	$1, $3, $0
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	and	$2, 0x33, $1
488c2ecf20Sopenharmony_ci	addq	$0, 2, $3
498c2ecf20Sopenharmony_ci	cmoveq	$1, $3, $0
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	and	$2, 0x55, $1
528c2ecf20Sopenharmony_ci	addq	$0, 1, $3
538c2ecf20Sopenharmony_ci	cmoveq	$1, $3, $0
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cidone:	subq	$0, $16, $0
568c2ecf20Sopenharmony_ci	ret	$31, ($26)
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	.end	strlen
598c2ecf20Sopenharmony_ci	EXPORT_SYMBOL(strlen)
60