162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Finds length of a 0-terminated string.  Optimized for the
662306a36Sopenharmony_ci * Alpha architecture:
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci *	- memory accessed as aligned quadwords only
962306a36Sopenharmony_ci *	- uses bcmpge to compare 8 bytes in parallel
1062306a36Sopenharmony_ci *	- does binary search to find 0 byte in last
1162306a36Sopenharmony_ci *	  quadword (HAKMEM needed 12 instructions to
1262306a36Sopenharmony_ci *	  do this instead of the 9 instructions that
1362306a36Sopenharmony_ci *	  binary search needs).
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci#include <linux/export.h>
1662306a36Sopenharmony_ci	.set noreorder
1762306a36Sopenharmony_ci	.set noat
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci	.align 3
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci	.globl	strlen
2262306a36Sopenharmony_ci	.ent	strlen
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistrlen:
2562306a36Sopenharmony_ci	ldq_u	$1, 0($16)	# load first quadword ($16  may be misaligned)
2662306a36Sopenharmony_ci	lda	$2, -1($31)
2762306a36Sopenharmony_ci	insqh	$2, $16, $2
2862306a36Sopenharmony_ci	andnot	$16, 7, $0
2962306a36Sopenharmony_ci	or	$2, $1, $1
3062306a36Sopenharmony_ci	cmpbge	$31, $1, $2	# $2  <- bitmask: bit i == 1 <==> i-th byte == 0
3162306a36Sopenharmony_ci	bne	$2, found
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ciloop:	ldq	$1, 8($0)
3462306a36Sopenharmony_ci	addq	$0, 8, $0	# addr += 8
3562306a36Sopenharmony_ci	nop			# helps dual issue last two insns
3662306a36Sopenharmony_ci	cmpbge	$31, $1, $2
3762306a36Sopenharmony_ci	beq	$2, loop
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_cifound:	blbs	$2, done	# make aligned case fast
4062306a36Sopenharmony_ci	negq	$2, $3
4162306a36Sopenharmony_ci	and	$2, $3, $2
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	and	$2, 0x0f, $1
4462306a36Sopenharmony_ci	addq	$0, 4, $3
4562306a36Sopenharmony_ci	cmoveq	$1, $3, $0
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	and	$2, 0x33, $1
4862306a36Sopenharmony_ci	addq	$0, 2, $3
4962306a36Sopenharmony_ci	cmoveq	$1, $3, $0
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	and	$2, 0x55, $1
5262306a36Sopenharmony_ci	addq	$0, 1, $3
5362306a36Sopenharmony_ci	cmoveq	$1, $3, $0
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cidone:	subq	$0, $16, $0
5662306a36Sopenharmony_ci	ret	$31, ($26)
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	.end	strlen
5962306a36Sopenharmony_ci	EXPORT_SYMBOL(strlen)
60