162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2013 ARM Ltd. 462306a36Sopenharmony_ci * Copyright (C) 2013 Linaro. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This code is based on glibc cortex strings work originally authored by Linaro 762306a36Sopenharmony_ci * be found @ 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 1062306a36Sopenharmony_ci * files/head:/src/aarch64/ 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <linux/linkage.h> 1462306a36Sopenharmony_ci#include <asm/assembler.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci/* 1762306a36Sopenharmony_ci * determine the length of a fixed-size string 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * Parameters: 2062306a36Sopenharmony_ci * x0 - const string pointer 2162306a36Sopenharmony_ci * x1 - maximal string length 2262306a36Sopenharmony_ci * Returns: 2362306a36Sopenharmony_ci * x0 - the return length of specific string 2462306a36Sopenharmony_ci */ 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* Arguments and results. */ 2762306a36Sopenharmony_cisrcin .req x0 2862306a36Sopenharmony_cilen .req x0 2962306a36Sopenharmony_cilimit .req x1 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* Locals and temporaries. */ 3262306a36Sopenharmony_cisrc .req x2 3362306a36Sopenharmony_cidata1 .req x3 3462306a36Sopenharmony_cidata2 .req x4 3562306a36Sopenharmony_cidata2a .req x5 3662306a36Sopenharmony_cihas_nul1 .req x6 3762306a36Sopenharmony_cihas_nul2 .req x7 3862306a36Sopenharmony_citmp1 .req x8 3962306a36Sopenharmony_citmp2 .req x9 4062306a36Sopenharmony_citmp3 .req x10 4162306a36Sopenharmony_citmp4 .req x11 4262306a36Sopenharmony_cizeroones .req x12 4362306a36Sopenharmony_cipos .req x13 4462306a36Sopenharmony_cilimit_wd .req x14 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#define REP8_01 0x0101010101010101 4762306a36Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f 4862306a36Sopenharmony_ci#define REP8_80 0x8080808080808080 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ciSYM_FUNC_START(__pi_strnlen) 5162306a36Sopenharmony_ci cbz limit, .Lhit_limit 5262306a36Sopenharmony_ci mov zeroones, #REP8_01 5362306a36Sopenharmony_ci bic src, srcin, #15 5462306a36Sopenharmony_ci ands tmp1, srcin, #15 5562306a36Sopenharmony_ci b.ne .Lmisaligned 5662306a36Sopenharmony_ci /* Calculate the number of full and partial words -1. */ 5762306a36Sopenharmony_ci sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ 5862306a36Sopenharmony_ci lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci /* 6162306a36Sopenharmony_ci * NUL detection works on the principle that (X - 1) & (~X) & 0x80 6262306a36Sopenharmony_ci * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 6362306a36Sopenharmony_ci * can be done in parallel across the entire word. 6462306a36Sopenharmony_ci */ 6562306a36Sopenharmony_ci /* 6662306a36Sopenharmony_ci * The inner loop deals with two Dwords at a time. This has a 6762306a36Sopenharmony_ci * slightly higher start-up cost, but we should win quite quickly, 6862306a36Sopenharmony_ci * especially on cores with a high number of issue slots per 6962306a36Sopenharmony_ci * cycle, as we get much better parallelism out of the operations. 7062306a36Sopenharmony_ci */ 7162306a36Sopenharmony_ci.Lloop: 7262306a36Sopenharmony_ci ldp data1, data2, [src], #16 7362306a36Sopenharmony_ci.Lrealigned: 7462306a36Sopenharmony_ci sub tmp1, data1, zeroones 7562306a36Sopenharmony_ci orr tmp2, data1, #REP8_7f 7662306a36Sopenharmony_ci sub tmp3, data2, zeroones 7762306a36Sopenharmony_ci orr tmp4, data2, #REP8_7f 7862306a36Sopenharmony_ci bic has_nul1, tmp1, tmp2 7962306a36Sopenharmony_ci bic has_nul2, tmp3, tmp4 8062306a36Sopenharmony_ci subs limit_wd, limit_wd, #1 8162306a36Sopenharmony_ci orr tmp1, has_nul1, has_nul2 8262306a36Sopenharmony_ci ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ 8362306a36Sopenharmony_ci b.eq .Lloop 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci cbz tmp1, .Lhit_limit /* No null in final Qword. */ 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci /* 8862306a36Sopenharmony_ci * We know there's a null in the final Qword. The easiest thing 8962306a36Sopenharmony_ci * to do now is work out the length of the string and return 9062306a36Sopenharmony_ci * MIN (len, limit). 9162306a36Sopenharmony_ci */ 9262306a36Sopenharmony_ci sub len, src, srcin 9362306a36Sopenharmony_ci cbz has_nul1, .Lnul_in_data2 9462306a36Sopenharmony_ciCPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/ 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci sub len, len, #8 9762306a36Sopenharmony_ci mov has_nul2, has_nul1 9862306a36Sopenharmony_ci.Lnul_in_data2: 9962306a36Sopenharmony_ci /* 10062306a36Sopenharmony_ci * For big-endian, carry propagation (if the final byte in the 10162306a36Sopenharmony_ci * string is 0x01) means we cannot use has_nul directly. The 10262306a36Sopenharmony_ci * easiest way to get the correct byte is to byte-swap the data 10362306a36Sopenharmony_ci * and calculate the syndrome a second time. 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_ciCPU_BE( rev data2, data2 ) 10662306a36Sopenharmony_ciCPU_BE( sub tmp1, data2, zeroones ) 10762306a36Sopenharmony_ciCPU_BE( orr tmp2, data2, #REP8_7f ) 10862306a36Sopenharmony_ciCPU_BE( bic has_nul2, tmp1, tmp2 ) 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci sub len, len, #8 11162306a36Sopenharmony_ci rev has_nul2, has_nul2 11262306a36Sopenharmony_ci clz pos, has_nul2 11362306a36Sopenharmony_ci add len, len, pos, lsr #3 /* Bits to bytes. */ 11462306a36Sopenharmony_ci cmp len, limit 11562306a36Sopenharmony_ci csel len, len, limit, ls /* Return the lower value. */ 11662306a36Sopenharmony_ci ret 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci.Lmisaligned: 11962306a36Sopenharmony_ci /* 12062306a36Sopenharmony_ci * Deal with a partial first word. 12162306a36Sopenharmony_ci * We're doing two things in parallel here; 12262306a36Sopenharmony_ci * 1) Calculate the number of words (but avoiding overflow if 12362306a36Sopenharmony_ci * limit is near ULONG_MAX) - to do this we need to work out 12462306a36Sopenharmony_ci * limit + tmp1 - 1 as a 65-bit value before shifting it; 12562306a36Sopenharmony_ci * 2) Load and mask the initial data words - we force the bytes 12662306a36Sopenharmony_ci * before the ones we are interested in to 0xff - this ensures 12762306a36Sopenharmony_ci * early bytes will not hit any zero detection. 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci ldp data1, data2, [src], #16 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci sub limit_wd, limit, #1 13262306a36Sopenharmony_ci and tmp3, limit_wd, #15 13362306a36Sopenharmony_ci lsr limit_wd, limit_wd, #4 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci add tmp3, tmp3, tmp1 13662306a36Sopenharmony_ci add limit_wd, limit_wd, tmp3, lsr #4 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci neg tmp4, tmp1 13962306a36Sopenharmony_ci lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci mov tmp2, #~0 14262306a36Sopenharmony_ci /* Big-endian. Early bytes are at MSB. */ 14362306a36Sopenharmony_ciCPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ 14462306a36Sopenharmony_ci /* Little-endian. Early bytes are at LSB. */ 14562306a36Sopenharmony_ciCPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci cmp tmp1, #8 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci orr data1, data1, tmp2 15062306a36Sopenharmony_ci orr data2a, data2, tmp2 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci csinv data1, data1, xzr, le 15362306a36Sopenharmony_ci csel data2, data2, data2a, le 15462306a36Sopenharmony_ci b .Lrealigned 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci.Lhit_limit: 15762306a36Sopenharmony_ci mov len, limit 15862306a36Sopenharmony_ci ret 15962306a36Sopenharmony_ciSYM_FUNC_END(__pi_strnlen) 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ciSYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen) 16262306a36Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(strnlen) 163