18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2013 ARM Ltd. 48c2ecf20Sopenharmony_ci * Copyright (C) 2013 Linaro. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * This code is based on glibc cortex strings work originally authored by Linaro 78c2ecf20Sopenharmony_ci * be found @ 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 108c2ecf20Sopenharmony_ci * files/head:/src/aarch64/ 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/linkage.h> 148c2ecf20Sopenharmony_ci#include <asm/assembler.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci/* 178c2ecf20Sopenharmony_ci * determine the length of a fixed-size string 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * Parameters: 208c2ecf20Sopenharmony_ci * x0 - const string pointer 218c2ecf20Sopenharmony_ci * x1 - maximal string length 228c2ecf20Sopenharmony_ci * Returns: 238c2ecf20Sopenharmony_ci * x0 - the return length of specific string 248c2ecf20Sopenharmony_ci */ 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/* Arguments and results. */ 278c2ecf20Sopenharmony_cisrcin .req x0 288c2ecf20Sopenharmony_cilen .req x0 298c2ecf20Sopenharmony_cilimit .req x1 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci/* Locals and temporaries. */ 328c2ecf20Sopenharmony_cisrc .req x2 338c2ecf20Sopenharmony_cidata1 .req x3 348c2ecf20Sopenharmony_cidata2 .req x4 358c2ecf20Sopenharmony_cidata2a .req x5 368c2ecf20Sopenharmony_cihas_nul1 .req x6 378c2ecf20Sopenharmony_cihas_nul2 .req x7 388c2ecf20Sopenharmony_citmp1 .req x8 398c2ecf20Sopenharmony_citmp2 .req x9 408c2ecf20Sopenharmony_citmp3 .req x10 418c2ecf20Sopenharmony_citmp4 .req x11 428c2ecf20Sopenharmony_cizeroones .req x12 438c2ecf20Sopenharmony_cipos .req x13 448c2ecf20Sopenharmony_cilimit_wd .req x14 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci#define REP8_01 0x0101010101010101 478c2ecf20Sopenharmony_ci#define REP8_7f 0x7f7f7f7f7f7f7f7f 488c2ecf20Sopenharmony_ci#define REP8_80 0x8080808080808080 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ciSYM_FUNC_START_WEAK_PI(strnlen) 518c2ecf20Sopenharmony_ci cbz limit, .Lhit_limit 528c2ecf20Sopenharmony_ci mov zeroones, #REP8_01 538c2ecf20Sopenharmony_ci bic src, srcin, #15 548c2ecf20Sopenharmony_ci ands tmp1, srcin, #15 558c2ecf20Sopenharmony_ci b.ne .Lmisaligned 568c2ecf20Sopenharmony_ci /* Calculate the number of full and partial words -1. */ 578c2ecf20Sopenharmony_ci sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ 588c2ecf20Sopenharmony_ci lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci /* 618c2ecf20Sopenharmony_ci * NUL detection works on the principle that (X - 1) & (~X) & 0x80 628c2ecf20Sopenharmony_ci * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 638c2ecf20Sopenharmony_ci * can be done in parallel across the entire word. 648c2ecf20Sopenharmony_ci */ 658c2ecf20Sopenharmony_ci /* 668c2ecf20Sopenharmony_ci * The inner loop deals with two Dwords at a time. This has a 678c2ecf20Sopenharmony_ci * slightly higher start-up cost, but we should win quite quickly, 688c2ecf20Sopenharmony_ci * especially on cores with a high number of issue slots per 698c2ecf20Sopenharmony_ci * cycle, as we get much better parallelism out of the operations. 708c2ecf20Sopenharmony_ci */ 718c2ecf20Sopenharmony_ci.Lloop: 728c2ecf20Sopenharmony_ci ldp data1, data2, [src], #16 738c2ecf20Sopenharmony_ci.Lrealigned: 748c2ecf20Sopenharmony_ci sub tmp1, data1, zeroones 758c2ecf20Sopenharmony_ci orr tmp2, data1, #REP8_7f 768c2ecf20Sopenharmony_ci sub tmp3, data2, zeroones 778c2ecf20Sopenharmony_ci orr tmp4, data2, #REP8_7f 788c2ecf20Sopenharmony_ci bic has_nul1, tmp1, tmp2 798c2ecf20Sopenharmony_ci bic has_nul2, tmp3, tmp4 808c2ecf20Sopenharmony_ci subs limit_wd, limit_wd, #1 818c2ecf20Sopenharmony_ci orr tmp1, has_nul1, has_nul2 828c2ecf20Sopenharmony_ci ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ 838c2ecf20Sopenharmony_ci b.eq .Lloop 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci cbz tmp1, .Lhit_limit /* No null in final Qword. */ 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci /* 888c2ecf20Sopenharmony_ci * We know there's a null in the final Qword. The easiest thing 898c2ecf20Sopenharmony_ci * to do now is work out the length of the string and return 908c2ecf20Sopenharmony_ci * MIN (len, limit). 918c2ecf20Sopenharmony_ci */ 928c2ecf20Sopenharmony_ci sub len, src, srcin 938c2ecf20Sopenharmony_ci cbz has_nul1, .Lnul_in_data2 948c2ecf20Sopenharmony_ciCPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/ 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci sub len, len, #8 978c2ecf20Sopenharmony_ci mov has_nul2, has_nul1 988c2ecf20Sopenharmony_ci.Lnul_in_data2: 998c2ecf20Sopenharmony_ci /* 1008c2ecf20Sopenharmony_ci * For big-endian, carry propagation (if the final byte in the 1018c2ecf20Sopenharmony_ci * string is 0x01) means we cannot use has_nul directly. The 1028c2ecf20Sopenharmony_ci * easiest way to get the correct byte is to byte-swap the data 1038c2ecf20Sopenharmony_ci * and calculate the syndrome a second time. 1048c2ecf20Sopenharmony_ci */ 1058c2ecf20Sopenharmony_ciCPU_BE( rev data2, data2 ) 1068c2ecf20Sopenharmony_ciCPU_BE( sub tmp1, data2, zeroones ) 1078c2ecf20Sopenharmony_ciCPU_BE( orr tmp2, data2, #REP8_7f ) 1088c2ecf20Sopenharmony_ciCPU_BE( bic has_nul2, tmp1, tmp2 ) 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci sub len, len, #8 1118c2ecf20Sopenharmony_ci rev has_nul2, has_nul2 1128c2ecf20Sopenharmony_ci clz pos, has_nul2 1138c2ecf20Sopenharmony_ci add len, len, pos, lsr #3 /* Bits to bytes. */ 1148c2ecf20Sopenharmony_ci cmp len, limit 1158c2ecf20Sopenharmony_ci csel len, len, limit, ls /* Return the lower value. */ 1168c2ecf20Sopenharmony_ci ret 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci.Lmisaligned: 1198c2ecf20Sopenharmony_ci /* 1208c2ecf20Sopenharmony_ci * Deal with a partial first word. 1218c2ecf20Sopenharmony_ci * We're doing two things in parallel here; 1228c2ecf20Sopenharmony_ci * 1) Calculate the number of words (but avoiding overflow if 1238c2ecf20Sopenharmony_ci * limit is near ULONG_MAX) - to do this we need to work out 1248c2ecf20Sopenharmony_ci * limit + tmp1 - 1 as a 65-bit value before shifting it; 1258c2ecf20Sopenharmony_ci * 2) Load and mask the initial data words - we force the bytes 1268c2ecf20Sopenharmony_ci * before the ones we are interested in to 0xff - this ensures 1278c2ecf20Sopenharmony_ci * early bytes will not hit any zero detection. 1288c2ecf20Sopenharmony_ci */ 1298c2ecf20Sopenharmony_ci ldp data1, data2, [src], #16 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci sub limit_wd, limit, #1 1328c2ecf20Sopenharmony_ci and tmp3, limit_wd, #15 1338c2ecf20Sopenharmony_ci lsr limit_wd, limit_wd, #4 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci add tmp3, tmp3, tmp1 1368c2ecf20Sopenharmony_ci add limit_wd, limit_wd, tmp3, lsr #4 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci neg tmp4, tmp1 1398c2ecf20Sopenharmony_ci lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci mov tmp2, #~0 1428c2ecf20Sopenharmony_ci /* Big-endian. Early bytes are at MSB. */ 1438c2ecf20Sopenharmony_ciCPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ 1448c2ecf20Sopenharmony_ci /* Little-endian. Early bytes are at LSB. */ 1458c2ecf20Sopenharmony_ciCPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci cmp tmp1, #8 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci orr data1, data1, tmp2 1508c2ecf20Sopenharmony_ci orr data2a, data2, tmp2 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci csinv data1, data1, xzr, le 1538c2ecf20Sopenharmony_ci csel data2, data2, data2a, le 1548c2ecf20Sopenharmony_ci b .Lrealigned 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci.Lhit_limit: 1578c2ecf20Sopenharmony_ci mov len, limit 1588c2ecf20Sopenharmony_ci ret 1598c2ecf20Sopenharmony_ciSYM_FUNC_END_PI(strnlen) 1608c2ecf20Sopenharmony_ciEXPORT_SYMBOL_NOKASAN(strnlen) 161