xref: /kernel/linux/linux-6.6/arch/arm64/lib/strnlen.S (revision 62306a36)
162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2013 ARM Ltd.
462306a36Sopenharmony_ci * Copyright (C) 2013 Linaro.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This code is based on glibc cortex strings work originally authored by Linaro
762306a36Sopenharmony_ci * be found @
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
1062306a36Sopenharmony_ci * files/head:/src/aarch64/
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <linux/linkage.h>
1462306a36Sopenharmony_ci#include <asm/assembler.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci/*
1762306a36Sopenharmony_ci * determine the length of a fixed-size string
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci * Parameters:
2062306a36Sopenharmony_ci *	x0 - const string pointer
2162306a36Sopenharmony_ci *	x1 - maximal string length
2262306a36Sopenharmony_ci * Returns:
2362306a36Sopenharmony_ci *	x0 - the return length of specific string
2462306a36Sopenharmony_ci */
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci/* Arguments and results.  */
2762306a36Sopenharmony_cisrcin		.req	x0
2862306a36Sopenharmony_cilen		.req	x0
2962306a36Sopenharmony_cilimit		.req	x1
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/* Locals and temporaries.  */
3262306a36Sopenharmony_cisrc		.req	x2
3362306a36Sopenharmony_cidata1		.req	x3
3462306a36Sopenharmony_cidata2		.req	x4
3562306a36Sopenharmony_cidata2a		.req	x5
3662306a36Sopenharmony_cihas_nul1	.req	x6
3762306a36Sopenharmony_cihas_nul2	.req	x7
3862306a36Sopenharmony_citmp1		.req	x8
3962306a36Sopenharmony_citmp2		.req	x9
4062306a36Sopenharmony_citmp3		.req	x10
4162306a36Sopenharmony_citmp4		.req	x11
4262306a36Sopenharmony_cizeroones	.req	x12
4362306a36Sopenharmony_cipos		.req	x13
4462306a36Sopenharmony_cilimit_wd	.req	x14
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci#define REP8_01 0x0101010101010101
4762306a36Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f
4862306a36Sopenharmony_ci#define REP8_80 0x8080808080808080
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ciSYM_FUNC_START(__pi_strnlen)
5162306a36Sopenharmony_ci	cbz	limit, .Lhit_limit
5262306a36Sopenharmony_ci	mov	zeroones, #REP8_01
5362306a36Sopenharmony_ci	bic	src, srcin, #15
5462306a36Sopenharmony_ci	ands	tmp1, srcin, #15
5562306a36Sopenharmony_ci	b.ne	.Lmisaligned
5662306a36Sopenharmony_ci	/* Calculate the number of full and partial words -1.  */
5762306a36Sopenharmony_ci	sub	limit_wd, limit, #1 /* Limit != 0, so no underflow.  */
5862306a36Sopenharmony_ci	lsr	limit_wd, limit_wd, #4  /* Convert to Qwords.  */
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	/*
6162306a36Sopenharmony_ci	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
6262306a36Sopenharmony_ci	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
6362306a36Sopenharmony_ci	* can be done in parallel across the entire word.
6462306a36Sopenharmony_ci	*/
6562306a36Sopenharmony_ci	/*
6662306a36Sopenharmony_ci	* The inner loop deals with two Dwords at a time.  This has a
6762306a36Sopenharmony_ci	* slightly higher start-up cost, but we should win quite quickly,
6862306a36Sopenharmony_ci	* especially on cores with a high number of issue slots per
6962306a36Sopenharmony_ci	* cycle, as we get much better parallelism out of the operations.
7062306a36Sopenharmony_ci	*/
7162306a36Sopenharmony_ci.Lloop:
7262306a36Sopenharmony_ci	ldp	data1, data2, [src], #16
7362306a36Sopenharmony_ci.Lrealigned:
7462306a36Sopenharmony_ci	sub	tmp1, data1, zeroones
7562306a36Sopenharmony_ci	orr	tmp2, data1, #REP8_7f
7662306a36Sopenharmony_ci	sub	tmp3, data2, zeroones
7762306a36Sopenharmony_ci	orr	tmp4, data2, #REP8_7f
7862306a36Sopenharmony_ci	bic	has_nul1, tmp1, tmp2
7962306a36Sopenharmony_ci	bic	has_nul2, tmp3, tmp4
8062306a36Sopenharmony_ci	subs	limit_wd, limit_wd, #1
8162306a36Sopenharmony_ci	orr	tmp1, has_nul1, has_nul2
8262306a36Sopenharmony_ci	ccmp	tmp1, #0, #0, pl    /* NZCV = 0000  */
8362306a36Sopenharmony_ci	b.eq	.Lloop
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	cbz	tmp1, .Lhit_limit   /* No null in final Qword.  */
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	/*
8862306a36Sopenharmony_ci	* We know there's a null in the final Qword. The easiest thing
8962306a36Sopenharmony_ci	* to do now is work out the length of the string and return
9062306a36Sopenharmony_ci	* MIN (len, limit).
9162306a36Sopenharmony_ci	*/
9262306a36Sopenharmony_ci	sub	len, src, srcin
9362306a36Sopenharmony_ci	cbz	has_nul1, .Lnul_in_data2
9462306a36Sopenharmony_ciCPU_BE( mov	data2, data1 )	/*perpare data to re-calculate the syndrome*/
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	sub	len, len, #8
9762306a36Sopenharmony_ci	mov	has_nul2, has_nul1
9862306a36Sopenharmony_ci.Lnul_in_data2:
9962306a36Sopenharmony_ci	/*
10062306a36Sopenharmony_ci	* For big-endian, carry propagation (if the final byte in the
10162306a36Sopenharmony_ci	* string is 0x01) means we cannot use has_nul directly.  The
10262306a36Sopenharmony_ci	* easiest way to get the correct byte is to byte-swap the data
10362306a36Sopenharmony_ci	* and calculate the syndrome a second time.
10462306a36Sopenharmony_ci	*/
10562306a36Sopenharmony_ciCPU_BE( rev	data2, data2 )
10662306a36Sopenharmony_ciCPU_BE( sub	tmp1, data2, zeroones )
10762306a36Sopenharmony_ciCPU_BE( orr	tmp2, data2, #REP8_7f )
10862306a36Sopenharmony_ciCPU_BE( bic	has_nul2, tmp1, tmp2 )
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	sub	len, len, #8
11162306a36Sopenharmony_ci	rev	has_nul2, has_nul2
11262306a36Sopenharmony_ci	clz	pos, has_nul2
11362306a36Sopenharmony_ci	add	len, len, pos, lsr #3       /* Bits to bytes.  */
11462306a36Sopenharmony_ci	cmp	len, limit
11562306a36Sopenharmony_ci	csel	len, len, limit, ls     /* Return the lower value.  */
11662306a36Sopenharmony_ci	ret
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci.Lmisaligned:
11962306a36Sopenharmony_ci	/*
12062306a36Sopenharmony_ci	* Deal with a partial first word.
12162306a36Sopenharmony_ci	* We're doing two things in parallel here;
12262306a36Sopenharmony_ci	* 1) Calculate the number of words (but avoiding overflow if
12362306a36Sopenharmony_ci	* limit is near ULONG_MAX) - to do this we need to work out
12462306a36Sopenharmony_ci	* limit + tmp1 - 1 as a 65-bit value before shifting it;
12562306a36Sopenharmony_ci	* 2) Load and mask the initial data words - we force the bytes
12662306a36Sopenharmony_ci	* before the ones we are interested in to 0xff - this ensures
12762306a36Sopenharmony_ci	* early bytes will not hit any zero detection.
12862306a36Sopenharmony_ci	*/
12962306a36Sopenharmony_ci	ldp	data1, data2, [src], #16
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	sub	limit_wd, limit, #1
13262306a36Sopenharmony_ci	and	tmp3, limit_wd, #15
13362306a36Sopenharmony_ci	lsr	limit_wd, limit_wd, #4
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	add	tmp3, tmp3, tmp1
13662306a36Sopenharmony_ci	add	limit_wd, limit_wd, tmp3, lsr #4
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	neg	tmp4, tmp1
13962306a36Sopenharmony_ci	lsl	tmp4, tmp4, #3  /* Bytes beyond alignment -> bits.  */
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	mov	tmp2, #~0
14262306a36Sopenharmony_ci	/* Big-endian.  Early bytes are at MSB.  */
14362306a36Sopenharmony_ciCPU_BE( lsl	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
14462306a36Sopenharmony_ci	/* Little-endian.  Early bytes are at LSB.  */
14562306a36Sopenharmony_ciCPU_LE( lsr	tmp2, tmp2, tmp4 )	/* Shift (tmp1 & 63).  */
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	cmp	tmp1, #8
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	orr	data1, data1, tmp2
15062306a36Sopenharmony_ci	orr	data2a, data2, tmp2
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	csinv	data1, data1, xzr, le
15362306a36Sopenharmony_ci	csel	data2, data2, data2a, le
15462306a36Sopenharmony_ci	b	.Lrealigned
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci.Lhit_limit:
15762306a36Sopenharmony_ci	mov	len, limit
15862306a36Sopenharmony_ci	ret
15962306a36Sopenharmony_ciSYM_FUNC_END(__pi_strnlen)
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ciSYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen)
16262306a36Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(strnlen)
163