162306a36Sopenharmony_ci/* Copyright (C) 1996 Free Software Foundation, Inc. 262306a36Sopenharmony_ci This file is part of the GNU C Library. 362306a36Sopenharmony_ci Contributed by David Mosberger (davidm@cs.arizona.edu). 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci The GNU C Library is free software; you can redistribute it and/or 662306a36Sopenharmony_ci modify it under the terms of the GNU Library General Public License as 762306a36Sopenharmony_ci published by the Free Software Foundation; either version 2 of the 862306a36Sopenharmony_ci License, or (at your option) any later version. 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci The GNU C Library is distributed in the hope that it will be useful, 1162306a36Sopenharmony_ci but WITHOUT ANY WARRANTY; without even the implied warranty of 1262306a36Sopenharmony_ci MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1362306a36Sopenharmony_ci Library General Public License for more details. 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci You should have received a copy of the GNU Library General Public 1662306a36Sopenharmony_ci License along with the GNU C Library; see the file COPYING.LIB. If not, 1762306a36Sopenharmony_ci write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 1862306a36Sopenharmony_ci Boston, MA 02111-1307, USA. */ 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci/* Finds characters in a memory area. Optimized for the Alpha: 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci - memory accessed as aligned quadwords only 2362306a36Sopenharmony_ci - uses cmpbge to compare 8 bytes in parallel 2462306a36Sopenharmony_ci - does binary search to find 0 byte in last 2562306a36Sopenharmony_ci quadword (HAKMEM needed 12 instructions to 2662306a36Sopenharmony_ci do this instead of the 9 instructions that 2762306a36Sopenharmony_ci binary search needs). 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ciFor correctness consider that: 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci - only minimum number of quadwords may be accessed 3262306a36Sopenharmony_ci - the third argument is an unsigned long 3362306a36Sopenharmony_ci*/ 3462306a36Sopenharmony_ci#include <linux/export.h> 3562306a36Sopenharmony_ci .set noreorder 3662306a36Sopenharmony_ci .set noat 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci .globl memchr 3962306a36Sopenharmony_ci .ent memchr 4062306a36Sopenharmony_cimemchr: 4162306a36Sopenharmony_ci .frame $30,0,$26,0 4262306a36Sopenharmony_ci .prologue 0 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci # Hack -- if someone passes in (size_t)-1, hoping to just 4562306a36Sopenharmony_ci # search til the end of the address space, we will overflow 4662306a36Sopenharmony_ci # below when we find the address of the last byte. Given 4762306a36Sopenharmony_ci # that we will never have a 56-bit address space, cropping 4862306a36Sopenharmony_ci # the length is the easiest way to avoid trouble. 4962306a36Sopenharmony_ci zap $18, 0x80, $5 #-e0 : 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci beq $18, $not_found # .. e1 : 5262306a36Sopenharmony_ci ldq_u $1, 0($16) # e1 : load first quadword 5362306a36Sopenharmony_ci insbl $17, 1, $2 # .. e0 : $2 = 000000000000ch00 5462306a36Sopenharmony_ci and $17, 0xff, $17 #-e0 : $17 = 00000000000000ch 5562306a36Sopenharmony_ci cmpult $18, 9, $4 # .. e1 : 5662306a36Sopenharmony_ci or $2, $17, $17 # e0 : $17 = 000000000000chch 5762306a36Sopenharmony_ci lda $3, -1($31) # .. e1 : 5862306a36Sopenharmony_ci sll $17, 16, $2 #-e0 : $2 = 00000000chch0000 5962306a36Sopenharmony_ci addq $16, $5, $5 # .. e1 : 6062306a36Sopenharmony_ci or $2, $17, $17 # e1 : $17 = 00000000chchchch 6162306a36Sopenharmony_ci unop # : 6262306a36Sopenharmony_ci sll $17, 32, $2 #-e0 : $2 = chchchch00000000 6362306a36Sopenharmony_ci or $2, $17, $17 # e1 : $17 = chchchchchchchch 6462306a36Sopenharmony_ci extql $1, $16, $7 # e0 : 6562306a36Sopenharmony_ci beq $4, $first_quad # .. e1 : 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci ldq_u $6, -1($5) #-e1 : eight or less bytes to search 6862306a36Sopenharmony_ci extqh $6, $16, $6 # .. e0 : 6962306a36Sopenharmony_ci mov $16, $0 # e0 : 7062306a36Sopenharmony_ci or $7, $6, $1 # .. e1 : $1 = quadword starting at $16 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci # Deal with the case where at most 8 bytes remain to be searched 7362306a36Sopenharmony_ci # in $1. E.g.: 7462306a36Sopenharmony_ci # $18 = 6 7562306a36Sopenharmony_ci # $1 = ????c6c5c4c3c2c1 7662306a36Sopenharmony_ci$last_quad: 7762306a36Sopenharmony_ci negq $18, $6 #-e0 : 7862306a36Sopenharmony_ci xor $17, $1, $1 # .. e1 : 7962306a36Sopenharmony_ci srl $3, $6, $6 # e0 : $6 = mask of $18 bits set 8062306a36Sopenharmony_ci cmpbge $31, $1, $2 # .. e1 : 8162306a36Sopenharmony_ci and $2, $6, $2 #-e0 : 8262306a36Sopenharmony_ci beq $2, $not_found # .. e1 : 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci$found_it: 8562306a36Sopenharmony_ci # Now, determine which byte matched: 8662306a36Sopenharmony_ci negq $2, $3 # e0 : 8762306a36Sopenharmony_ci and $2, $3, $2 # e1 : 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci and $2, 0x0f, $1 #-e0 : 9062306a36Sopenharmony_ci addq $0, 4, $3 # .. e1 : 9162306a36Sopenharmony_ci cmoveq $1, $3, $0 # e0 : 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci addq $0, 2, $3 # .. e1 : 9462306a36Sopenharmony_ci and $2, 0x33, $1 #-e0 : 9562306a36Sopenharmony_ci cmoveq $1, $3, $0 # .. e1 : 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci and $2, 0x55, $1 # e0 : 9862306a36Sopenharmony_ci addq $0, 1, $3 # .. e1 : 9962306a36Sopenharmony_ci cmoveq $1, $3, $0 #-e0 : 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci$done: ret # .. e1 : 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci # Deal with the case where $18 > 8 bytes remain to be 10462306a36Sopenharmony_ci # searched. $16 may not be aligned. 10562306a36Sopenharmony_ci .align 4 10662306a36Sopenharmony_ci$first_quad: 10762306a36Sopenharmony_ci andnot $16, 0x7, $0 #-e1 : 10862306a36Sopenharmony_ci insqh $3, $16, $2 # .. e0 : $2 = 0000ffffffffffff ($16<0:2> ff) 10962306a36Sopenharmony_ci xor $1, $17, $1 # e0 : 11062306a36Sopenharmony_ci or $1, $2, $1 # e1 : $1 = ====ffffffffffff 11162306a36Sopenharmony_ci cmpbge $31, $1, $2 #-e0 : 11262306a36Sopenharmony_ci bne $2, $found_it # .. e1 : 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci # At least one byte left to process. 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci ldq $1, 8($0) # e0 : 11762306a36Sopenharmony_ci subq $5, 1, $18 # .. e1 : 11862306a36Sopenharmony_ci addq $0, 8, $0 #-e0 : 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci # Make $18 point to last quad to be accessed (the 12162306a36Sopenharmony_ci # last quad may or may not be partial). 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci andnot $18, 0x7, $18 # .. e1 : 12462306a36Sopenharmony_ci cmpult $0, $18, $2 # e0 : 12562306a36Sopenharmony_ci beq $2, $final # .. e1 : 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci # At least two quads remain to be accessed. 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci subq $18, $0, $4 #-e0 : $4 <- nr quads to be processed 13062306a36Sopenharmony_ci and $4, 8, $4 # e1 : odd number of quads? 13162306a36Sopenharmony_ci bne $4, $odd_quad_count # e1 : 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci # At least three quads remain to be accessed 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci mov $1, $4 # e0 : move prefetched value to correct reg 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci .align 4 13862306a36Sopenharmony_ci$unrolled_loop: 13962306a36Sopenharmony_ci ldq $1, 8($0) #-e0 : prefetch $1 14062306a36Sopenharmony_ci xor $17, $4, $2 # .. e1 : 14162306a36Sopenharmony_ci cmpbge $31, $2, $2 # e0 : 14262306a36Sopenharmony_ci bne $2, $found_it # .. e1 : 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci addq $0, 8, $0 #-e0 : 14562306a36Sopenharmony_ci$odd_quad_count: 14662306a36Sopenharmony_ci xor $17, $1, $2 # .. e1 : 14762306a36Sopenharmony_ci ldq $4, 8($0) # e0 : prefetch $4 14862306a36Sopenharmony_ci cmpbge $31, $2, $2 # .. e1 : 14962306a36Sopenharmony_ci addq $0, 8, $6 #-e0 : 15062306a36Sopenharmony_ci bne $2, $found_it # .. e1 : 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci cmpult $6, $18, $6 # e0 : 15362306a36Sopenharmony_ci addq $0, 8, $0 # .. e1 : 15462306a36Sopenharmony_ci bne $6, $unrolled_loop #-e1 : 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci mov $4, $1 # e0 : move prefetched value into $1 15762306a36Sopenharmony_ci$final: subq $5, $0, $18 # .. e1 : $18 <- number of bytes left to do 15862306a36Sopenharmony_ci bne $18, $last_quad # e1 : 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci$not_found: 16162306a36Sopenharmony_ci mov $31, $0 #-e0 : 16262306a36Sopenharmony_ci ret # .. e1 : 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci .end memchr 16562306a36Sopenharmony_ci EXPORT_SYMBOL(memchr) 166