18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * strlen() for PPC32 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Inspired from glibc implementation 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h> 108c2ecf20Sopenharmony_ci#include <asm/export.h> 118c2ecf20Sopenharmony_ci#include <asm/cache.h> 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci .text 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci/* 168c2ecf20Sopenharmony_ci * Algorithm: 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * 1) Given a word 'x', we can test to see if it contains any 0 bytes 198c2ecf20Sopenharmony_ci * by subtracting 0x01010101, and seeing if any of the high bits of each 208c2ecf20Sopenharmony_ci * byte changed from 0 to 1. This works because the least significant 218c2ecf20Sopenharmony_ci * 0 byte must have had no incoming carry (otherwise it's not the least 228c2ecf20Sopenharmony_ci * significant), so it is 0x00 - 0x01 == 0xff. For all other 238c2ecf20Sopenharmony_ci * byte values, either they have the high bit set initially, or when 248c2ecf20Sopenharmony_ci * 1 is subtracted you get a value in the range 0x00-0x7f, none of which 258c2ecf20Sopenharmony_ci * have their high bit set. The expression here is 268c2ecf20Sopenharmony_ci * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when 278c2ecf20Sopenharmony_ci * there were no 0x00 bytes in the word. You get 0x80 in bytes that 288c2ecf20Sopenharmony_ci * match, but possibly false 0x80 matches in the next more significant 298c2ecf20Sopenharmony_ci * byte to a true match due to carries. For little-endian this is 308c2ecf20Sopenharmony_ci * of no consequence since the least significant match is the one 318c2ecf20Sopenharmony_ci * we're interested in, but big-endian needs method 2 to find which 328c2ecf20Sopenharmony_ci * byte matches. 338c2ecf20Sopenharmony_ci * 2) Given a word 'x', we can test to see _which_ byte was zero by 348c2ecf20Sopenharmony_ci * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080). 358c2ecf20Sopenharmony_ci * This produces 0x80 in each byte that was zero, and 0x00 in all 368c2ecf20Sopenharmony_ci * the other bytes. The '| ~0x80808080' clears the low 7 bits in each 378c2ecf20Sopenharmony_ci * byte, and the '| x' part ensures that bytes with the high bit set 388c2ecf20Sopenharmony_ci * produce 0x00. The addition will carry into the high bit of each byte 398c2ecf20Sopenharmony_ci * iff that byte had one of its low 7 bits set. We can then just see 408c2ecf20Sopenharmony_ci * which was the most significant bit set and divide by 8 to find how 418c2ecf20Sopenharmony_ci * many to add to the index. 428c2ecf20Sopenharmony_ci * This is from the book 'The PowerPC Compiler Writer's Guide', 438c2ecf20Sopenharmony_ci * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. 448c2ecf20Sopenharmony_ci */ 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci_GLOBAL(strlen) 478c2ecf20Sopenharmony_ci andi. r0, r3, 3 488c2ecf20Sopenharmony_ci lis r7, 0x0101 498c2ecf20Sopenharmony_ci addi r10, r3, -4 508c2ecf20Sopenharmony_ci addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */ 518c2ecf20Sopenharmony_ci rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */ 528c2ecf20Sopenharmony_ci bne- 3f 538c2ecf20Sopenharmony_ci .balign IFETCH_ALIGN_BYTES 548c2ecf20Sopenharmony_ci1: lwzu r9, 4(r10) 558c2ecf20Sopenharmony_ci2: subf r8, r7, r9 568c2ecf20Sopenharmony_ci and. r8, r8, r6 578c2ecf20Sopenharmony_ci beq+ 1b 588c2ecf20Sopenharmony_ci andc. r8, r8, r9 598c2ecf20Sopenharmony_ci beq+ 1b 608c2ecf20Sopenharmony_ci andc r8, r9, r6 618c2ecf20Sopenharmony_ci orc r9, r9, r6 628c2ecf20Sopenharmony_ci subfe r8, r6, r8 638c2ecf20Sopenharmony_ci nor r8, r8, r9 648c2ecf20Sopenharmony_ci cntlzw r8, r8 658c2ecf20Sopenharmony_ci subf r3, r3, r10 668c2ecf20Sopenharmony_ci srwi r8, r8, 3 678c2ecf20Sopenharmony_ci add r3, r3, r8 688c2ecf20Sopenharmony_ci blr 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci /* Missaligned string: make sure bytes before string are seen not 0 */ 718c2ecf20Sopenharmony_ci3: xor r10, r10, r0 728c2ecf20Sopenharmony_ci orc r8, r8, r8 738c2ecf20Sopenharmony_ci lwzu r9, 4(r10) 748c2ecf20Sopenharmony_ci slwi r0, r0, 3 758c2ecf20Sopenharmony_ci srw r8, r8, r0 768c2ecf20Sopenharmony_ci orc r9, r9, r8 778c2ecf20Sopenharmony_ci b 2b 788c2ecf20Sopenharmony_ciEXPORT_SYMBOL(strlen) 79