1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2013 ARM Ltd. 4 * Copyright (C) 2013 Linaro. 5 * 6 * This code is based on glibc cortex strings work originally authored by Linaro 7 * be found @ 8 * 9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 * files/head:/src/aarch64/ 11 */ 12 13 #include <linux/linkage.h> 14 #include <asm/assembler.h> 15 16 /* 17 * compare two strings 18 * 19 * Parameters: 20 * x0 - const string 1 pointer 21 * x1 - const string 2 pointer 22 * x2 - the maximal length to be compared 23 * Returns: 24 * x0 - an integer less than, equal to, or greater than zero if s1 is found, 25 * respectively, to be less than, to match, or be greater than s2. 26 */ 27 28 #define REP8_01 0x0101010101010101 29 #define REP8_7f 0x7f7f7f7f7f7f7f7f 30 #define REP8_80 0x8080808080808080 31 32 /* Parameters and result. */ 33 src1 .req x0 34 src2 .req x1 35 limit .req x2 36 result .req x0 37 38 /* Internal variables. */ 39 data1 .req x3 40 data1w .req w3 41 data2 .req x4 42 data2w .req w4 43 has_nul .req x5 44 diff .req x6 45 syndrome .req x7 46 tmp1 .req x8 47 tmp2 .req x9 48 tmp3 .req x10 49 zeroones .req x11 50 pos .req x12 51 limit_wd .req x13 52 mask .req x14 53 endloop .req x15 54 55 SYM_FUNC_START_WEAK_PI(strncmp) 56 cbz limit, .Lret0 57 eor tmp1, src1, src2 58 mov zeroones, #REP8_01 59 tst tmp1, #7 60 b.ne .Lmisaligned8 61 ands tmp1, src1, #7 62 b.ne .Lmutual_align 63 /* Calculate the number of full and partial words -1. */ 64 /* 65 * when limit is mulitply of 8, if not sub 1, 66 * the judgement of last dword will wrong. 67 */ 68 sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 69 lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ 70 71 /* 72 * NUL detection works on the principle that (X - 1) & (~X) & 0x80 73 * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 74 * can be done in parallel across the entire word. 75 */ 76 .Lloop_aligned: 77 ldr data1, [src1], #8 78 ldr data2, [src2], #8 79 .Lstart_realigned: 80 subs limit_wd, limit_wd, #1 81 sub tmp1, data1, zeroones 82 orr tmp2, data1, #REP8_7f 83 eor diff, data1, data2 /* Non-zero if differences found. */ 84 csinv endloop, diff, xzr, pl /* Last Dword or differences.*/ 85 bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 86 ccmp endloop, #0, #0, eq 87 b.eq .Lloop_aligned 88 89 /*Not reached the limit, must have found the end or a diff. */ 90 tbz limit_wd, #63, .Lnot_limit 91 92 /* Limit % 8 == 0 => all bytes significant. */ 93 ands limit, limit, #7 94 b.eq .Lnot_limit 95 96 lsl limit, limit, #3 /* Bits -> bytes. */ 97 mov mask, #~0 98 CPU_BE( lsr mask, mask, limit ) 99 CPU_LE( lsl mask, mask, limit ) 100 bic data1, data1, mask 101 bic data2, data2, mask 102 103 /* Make sure that the NUL byte is marked in the syndrome. */ 104 orr has_nul, has_nul, mask 105 106 .Lnot_limit: 107 orr syndrome, diff, has_nul 108 b .Lcal_cmpresult 109 110 .Lmutual_align: 111 /* 112 * Sources are mutually aligned, but are not currently at an 113 * alignment boundary. Round down the addresses and then mask off 114 * the bytes that precede the start point. 115 * We also need to adjust the limit calculations, but without 116 * overflowing if the limit is near ULONG_MAX. 117 */ 118 bic src1, src1, #7 119 bic src2, src2, #7 120 ldr data1, [src1], #8 121 neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */ 122 ldr data2, [src2], #8 123 mov tmp2, #~0 124 sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 125 /* Big-endian. Early bytes are at MSB. */ 126 CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ 127 /* Little-endian. Early bytes are at LSB. */ 128 CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ 129 130 and tmp3, limit_wd, #7 131 lsr limit_wd, limit_wd, #3 132 /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/ 133 add limit, limit, tmp1 134 add tmp3, tmp3, tmp1 135 orr data1, data1, tmp2 136 orr data2, data2, tmp2 137 add limit_wd, limit_wd, tmp3, lsr #3 138 b .Lstart_realigned 139 140 /*when src1 offset is not equal to src2 offset...*/ 141 .Lmisaligned8: 142 cmp limit, #8 143 b.lo .Ltiny8proc /*limit < 8... */ 144 /* 145 * Get the align offset length to compare per byte first. 146 * After this process, one string's address will be aligned.*/ 147 and tmp1, src1, #7 148 neg tmp1, tmp1 149 add tmp1, tmp1, #8 150 and tmp2, src2, #7 151 neg tmp2, tmp2 152 add tmp2, tmp2, #8 153 subs tmp3, tmp1, tmp2 154 csel pos, tmp1, tmp2, hi /*Choose the maximum. */ 155 /* 156 * Here, limit is not less than 8, so directly run .Ltinycmp 157 * without checking the limit.*/ 158 sub limit, limit, pos 159 .Ltinycmp: 160 ldrb data1w, [src1], #1 161 ldrb data2w, [src2], #1 162 subs pos, pos, #1 163 ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ 164 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 165 b.eq .Ltinycmp 166 cbnz pos, 1f /*find the null or unequal...*/ 167 cmp data1w, #1 168 ccmp data1w, data2w, #0, cs 169 b.eq .Lstart_align /*the last bytes are equal....*/ 170 1: 171 sub result, data1, data2 172 ret 173 174 .Lstart_align: 175 lsr limit_wd, limit, #3 176 cbz limit_wd, .Lremain8 177 /*process more leading bytes to make str1 aligned...*/ 178 ands xzr, src1, #7 179 b.eq .Lrecal_offset 180 add src1, src1, tmp3 /*tmp3 is positive in this branch.*/ 181 add src2, src2, tmp3 182 ldr data1, [src1], #8 183 ldr data2, [src2], #8 184 185 sub limit, limit, tmp3 186 lsr limit_wd, limit, #3 187 subs limit_wd, limit_wd, #1 188 189 sub tmp1, data1, zeroones 190 orr tmp2, data1, #REP8_7f 191 eor diff, data1, data2 /* Non-zero if differences found. */ 192 csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ 193 bics has_nul, tmp1, tmp2 194 ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ 195 b.ne .Lunequal_proc 196 /*How far is the current str2 from the alignment boundary...*/ 197 and tmp3, tmp3, #7 198 .Lrecal_offset: 199 neg pos, tmp3 200 .Lloopcmp_proc: 201 /* 202 * Divide the eight bytes into two parts. First,backwards the src2 203 * to an alignment boundary,load eight bytes from the SRC2 alignment 204 * boundary,then compare with the relative bytes from SRC1. 205 * If all 8 bytes are equal,then start the second part's comparison. 206 * Otherwise finish the comparison. 207 * This special handle can garantee all the accesses are in the 208 * thread/task space in avoid to overrange access. 209 */ 210 ldr data1, [src1,pos] 211 ldr data2, [src2,pos] 212 sub tmp1, data1, zeroones 213 orr tmp2, data1, #REP8_7f 214 bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 215 eor diff, data1, data2 /* Non-zero if differences found. */ 216 csinv endloop, diff, xzr, eq 217 cbnz endloop, .Lunequal_proc 218 219 /*The second part process*/ 220 ldr data1, [src1], #8 221 ldr data2, [src2], #8 222 subs limit_wd, limit_wd, #1 223 sub tmp1, data1, zeroones 224 orr tmp2, data1, #REP8_7f 225 eor diff, data1, data2 /* Non-zero if differences found. */ 226 csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ 227 bics has_nul, tmp1, tmp2 228 ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ 229 b.eq .Lloopcmp_proc 230 231 .Lunequal_proc: 232 orr syndrome, diff, has_nul 233 cbz syndrome, .Lremain8 234 .Lcal_cmpresult: 235 /* 236 * reversed the byte-order as big-endian,then CLZ can find the most 237 * significant zero bits. 238 */ 239 CPU_LE( rev syndrome, syndrome ) 240 CPU_LE( rev data1, data1 ) 241 CPU_LE( rev data2, data2 ) 242 /* 243 * For big-endian we cannot use the trick with the syndrome value 244 * as carry-propagation can corrupt the upper bits if the trailing 245 * bytes in the string contain 0x01. 246 * However, if there is no NUL byte in the dword, we can generate 247 * the result directly. We can't just subtract the bytes as the 248 * MSB might be significant. 249 */ 250 CPU_BE( cbnz has_nul, 1f ) 251 CPU_BE( cmp data1, data2 ) 252 CPU_BE( cset result, ne ) 253 CPU_BE( cneg result, result, lo ) 254 CPU_BE( ret ) 255 CPU_BE( 1: ) 256 /* Re-compute the NUL-byte detection, using a byte-reversed value.*/ 257 CPU_BE( rev tmp3, data1 ) 258 CPU_BE( sub tmp1, tmp3, zeroones ) 259 CPU_BE( orr tmp2, tmp3, #REP8_7f ) 260 CPU_BE( bic has_nul, tmp1, tmp2 ) 261 CPU_BE( rev has_nul, has_nul ) 262 CPU_BE( orr syndrome, diff, has_nul ) 263 /* 264 * The MS-non-zero bit of the syndrome marks either the first bit 265 * that is different, or the top bit of the first zero byte. 266 * Shifting left now will bring the critical information into the 267 * top bits. 268 */ 269 clz pos, syndrome 270 lsl data1, data1, pos 271 lsl data2, data2, pos 272 /* 273 * But we need to zero-extend (char is unsigned) the value and then 274 * perform a signed 32-bit subtraction. 275 */ 276 lsr data1, data1, #56 277 sub result, data1, data2, lsr #56 278 ret 279 280 .Lremain8: 281 /* Limit % 8 == 0 => all bytes significant. */ 282 ands limit, limit, #7 283 b.eq .Lret0 284 .Ltiny8proc: 285 ldrb data1w, [src1], #1 286 ldrb data2w, [src2], #1 287 subs limit, limit, #1 288 289 ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ 290 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 291 b.eq .Ltiny8proc 292 sub result, data1, data2 293 ret 294 295 .Lret0: 296 mov result, #0 297 ret 298 SYM_FUNC_END_PI(strncmp) 299 EXPORT_SYMBOL_NOKASAN(strncmp) 300