18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2013 ARM Ltd.
48c2ecf20Sopenharmony_ci * Copyright (C) 2013 Linaro.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * This code is based on glibc cortex strings work originally authored by Linaro
78c2ecf20Sopenharmony_ci * be found @
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
108c2ecf20Sopenharmony_ci * files/head:/src/aarch64/
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/linkage.h>
148c2ecf20Sopenharmony_ci#include <asm/assembler.h>
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci/*
178c2ecf20Sopenharmony_ci * calculate the length of a string
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci * Parameters:
208c2ecf20Sopenharmony_ci *	x0 - const string pointer
218c2ecf20Sopenharmony_ci * Returns:
228c2ecf20Sopenharmony_ci *	x0 - the return length of specific string
238c2ecf20Sopenharmony_ci */
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci/* Arguments and results.  */
268c2ecf20Sopenharmony_cisrcin		.req	x0
278c2ecf20Sopenharmony_cilen		.req	x0
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci/* Locals and temporaries.  */
308c2ecf20Sopenharmony_cisrc		.req	x1
318c2ecf20Sopenharmony_cidata1		.req	x2
328c2ecf20Sopenharmony_cidata2		.req	x3
338c2ecf20Sopenharmony_cidata2a		.req	x4
348c2ecf20Sopenharmony_cihas_nul1	.req	x5
358c2ecf20Sopenharmony_cihas_nul2	.req	x6
368c2ecf20Sopenharmony_citmp1		.req	x7
378c2ecf20Sopenharmony_citmp2		.req	x8
388c2ecf20Sopenharmony_citmp3		.req	x9
398c2ecf20Sopenharmony_citmp4		.req	x10
408c2ecf20Sopenharmony_cizeroones	.req	x11
418c2ecf20Sopenharmony_cipos		.req	x12
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#define REP8_01 0x0101010101010101
448c2ecf20Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f
458c2ecf20Sopenharmony_ci#define REP8_80 0x8080808080808080
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ciSYM_FUNC_START_WEAK_PI(strlen)
488c2ecf20Sopenharmony_ci	mov	zeroones, #REP8_01
498c2ecf20Sopenharmony_ci	bic	src, srcin, #15
508c2ecf20Sopenharmony_ci	ands	tmp1, srcin, #15
518c2ecf20Sopenharmony_ci	b.ne	.Lmisaligned
528c2ecf20Sopenharmony_ci	/*
538c2ecf20Sopenharmony_ci	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
548c2ecf20Sopenharmony_ci	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
558c2ecf20Sopenharmony_ci	* can be done in parallel across the entire word.
568c2ecf20Sopenharmony_ci	*/
578c2ecf20Sopenharmony_ci	/*
588c2ecf20Sopenharmony_ci	* The inner loop deals with two Dwords at a time. This has a
598c2ecf20Sopenharmony_ci	* slightly higher start-up cost, but we should win quite quickly,
608c2ecf20Sopenharmony_ci	* especially on cores with a high number of issue slots per
618c2ecf20Sopenharmony_ci	* cycle, as we get much better parallelism out of the operations.
628c2ecf20Sopenharmony_ci	*/
638c2ecf20Sopenharmony_ci.Lloop:
648c2ecf20Sopenharmony_ci	ldp	data1, data2, [src], #16
658c2ecf20Sopenharmony_ci.Lrealigned:
668c2ecf20Sopenharmony_ci	sub	tmp1, data1, zeroones
678c2ecf20Sopenharmony_ci	orr	tmp2, data1, #REP8_7f
688c2ecf20Sopenharmony_ci	sub	tmp3, data2, zeroones
698c2ecf20Sopenharmony_ci	orr	tmp4, data2, #REP8_7f
708c2ecf20Sopenharmony_ci	bic	has_nul1, tmp1, tmp2
718c2ecf20Sopenharmony_ci	bics	has_nul2, tmp3, tmp4
728c2ecf20Sopenharmony_ci	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
738c2ecf20Sopenharmony_ci	b.eq	.Lloop
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	sub	len, src, srcin
768c2ecf20Sopenharmony_ci	cbz	has_nul1, .Lnul_in_data2
778c2ecf20Sopenharmony_ciCPU_BE(	mov	data2, data1 )	/*prepare data to re-calculate the syndrome*/
788c2ecf20Sopenharmony_ci	sub	len, len, #8
798c2ecf20Sopenharmony_ci	mov	has_nul2, has_nul1
808c2ecf20Sopenharmony_ci.Lnul_in_data2:
818c2ecf20Sopenharmony_ci	/*
828c2ecf20Sopenharmony_ci	* For big-endian, carry propagation (if the final byte in the
838c2ecf20Sopenharmony_ci	* string is 0x01) means we cannot use has_nul directly.  The
848c2ecf20Sopenharmony_ci	* easiest way to get the correct byte is to byte-swap the data
858c2ecf20Sopenharmony_ci	* and calculate the syndrome a second time.
868c2ecf20Sopenharmony_ci	*/
878c2ecf20Sopenharmony_ciCPU_BE( rev	data2, data2 )
888c2ecf20Sopenharmony_ciCPU_BE( sub	tmp1, data2, zeroones )
898c2ecf20Sopenharmony_ciCPU_BE( orr	tmp2, data2, #REP8_7f )
908c2ecf20Sopenharmony_ciCPU_BE( bic	has_nul2, tmp1, tmp2 )
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	sub	len, len, #8
938c2ecf20Sopenharmony_ci	rev	has_nul2, has_nul2
948c2ecf20Sopenharmony_ci	clz	pos, has_nul2
958c2ecf20Sopenharmony_ci	add	len, len, pos, lsr #3		/* Bits to bytes.  */
968c2ecf20Sopenharmony_ci	ret
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci.Lmisaligned:
998c2ecf20Sopenharmony_ci	cmp	tmp1, #8
1008c2ecf20Sopenharmony_ci	neg	tmp1, tmp1
1018c2ecf20Sopenharmony_ci	ldp	data1, data2, [src], #16
1028c2ecf20Sopenharmony_ci	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
1038c2ecf20Sopenharmony_ci	mov	tmp2, #~0
1048c2ecf20Sopenharmony_ci	/* Big-endian.  Early bytes are at MSB.  */
1058c2ecf20Sopenharmony_ciCPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
1068c2ecf20Sopenharmony_ci	/* Little-endian.  Early bytes are at LSB.  */
1078c2ecf20Sopenharmony_ciCPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	orr	data1, data1, tmp2
1108c2ecf20Sopenharmony_ci	orr	data2a, data2, tmp2
1118c2ecf20Sopenharmony_ci	csinv	data1, data1, xzr, le
1128c2ecf20Sopenharmony_ci	csel	data2, data2, data2a, le
1138c2ecf20Sopenharmony_ci	b	.Lrealigned
1148c2ecf20Sopenharmony_ciSYM_FUNC_END_PI(strlen)
1158c2ecf20Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(strlen)
116