18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Finds length of a 0-terminated string. Optimized for the 68c2ecf20Sopenharmony_ci * Alpha architecture: 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * - memory accessed as aligned quadwords only 98c2ecf20Sopenharmony_ci * - uses bcmpge to compare 8 bytes in parallel 108c2ecf20Sopenharmony_ci * - does binary search to find 0 byte in last 118c2ecf20Sopenharmony_ci * quadword (HAKMEM needed 12 instructions to 128c2ecf20Sopenharmony_ci * do this instead of the 9 instructions that 138c2ecf20Sopenharmony_ci * binary search needs). 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci#include <asm/export.h> 168c2ecf20Sopenharmony_ci .set noreorder 178c2ecf20Sopenharmony_ci .set noat 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci .align 3 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci .globl strlen 228c2ecf20Sopenharmony_ci .ent strlen 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_cistrlen: 258c2ecf20Sopenharmony_ci ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) 268c2ecf20Sopenharmony_ci lda $2, -1($31) 278c2ecf20Sopenharmony_ci insqh $2, $16, $2 288c2ecf20Sopenharmony_ci andnot $16, 7, $0 298c2ecf20Sopenharmony_ci or $2, $1, $1 308c2ecf20Sopenharmony_ci cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 318c2ecf20Sopenharmony_ci bne $2, found 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ciloop: ldq $1, 8($0) 348c2ecf20Sopenharmony_ci addq $0, 8, $0 # addr += 8 358c2ecf20Sopenharmony_ci nop # helps dual issue last two insns 368c2ecf20Sopenharmony_ci cmpbge $31, $1, $2 378c2ecf20Sopenharmony_ci beq $2, loop 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cifound: blbs $2, done # make aligned case fast 408c2ecf20Sopenharmony_ci negq $2, $3 418c2ecf20Sopenharmony_ci and $2, $3, $2 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci and $2, 0x0f, $1 448c2ecf20Sopenharmony_ci addq $0, 4, $3 458c2ecf20Sopenharmony_ci cmoveq $1, $3, $0 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci and $2, 0x33, $1 488c2ecf20Sopenharmony_ci addq $0, 2, $3 498c2ecf20Sopenharmony_ci cmoveq $1, $3, $0 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci and $2, 0x55, $1 528c2ecf20Sopenharmony_ci addq $0, 1, $3 538c2ecf20Sopenharmony_ci cmoveq $1, $3, $0 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_cidone: subq $0, $16, $0 568c2ecf20Sopenharmony_ci ret $31, ($26) 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci .end strlen 598c2ecf20Sopenharmony_ci EXPORT_SYMBOL(strlen) 60