18c2ecf20Sopenharmony_ci/* Extracted from GLIBC memcpy.c and memcopy.h, which is: 28c2ecf20Sopenharmony_ci Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. 38c2ecf20Sopenharmony_ci This file is part of the GNU C Library. 48c2ecf20Sopenharmony_ci Contributed by Torbjorn Granlund (tege@sics.se). 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci The GNU C Library is free software; you can redistribute it and/or 78c2ecf20Sopenharmony_ci modify it under the terms of the GNU Lesser General Public 88c2ecf20Sopenharmony_ci License as published by the Free Software Foundation; either 98c2ecf20Sopenharmony_ci version 2.1 of the License, or (at your option) any later version. 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci The GNU C Library is distributed in the hope that it will be useful, 128c2ecf20Sopenharmony_ci but WITHOUT ANY WARRANTY; without even the implied warranty of 138c2ecf20Sopenharmony_ci MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 148c2ecf20Sopenharmony_ci Lesser General Public License for more details. 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci You should have received a copy of the GNU Lesser General Public 178c2ecf20Sopenharmony_ci License along with the GNU C Library; if not, see 188c2ecf20Sopenharmony_ci <http://www.gnu.org/licenses/>. */ 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci#include <linux/types.h> 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci/* Type to use for aligned memory operations. 238c2ecf20Sopenharmony_ci This should normally be the biggest type supported by a single load 248c2ecf20Sopenharmony_ci and store. */ 258c2ecf20Sopenharmony_ci#define op_t unsigned long int 268c2ecf20Sopenharmony_ci#define OPSIZ (sizeof(op_t)) 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci/* Optimal type for storing bytes in registers. */ 298c2ecf20Sopenharmony_ci#define reg_char char 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci/* Copy exactly NBYTES bytes from SRC_BP to DST_BP, 348c2ecf20Sopenharmony_ci without any assumptions about alignment of the pointers. */ 358c2ecf20Sopenharmony_ci#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ 368c2ecf20Sopenharmony_cido { \ 378c2ecf20Sopenharmony_ci size_t __nbytes = (nbytes); \ 388c2ecf20Sopenharmony_ci while (__nbytes > 0) { \ 398c2ecf20Sopenharmony_ci unsigned char __x = ((unsigned char *) src_bp)[0]; \ 408c2ecf20Sopenharmony_ci src_bp += 1; \ 418c2ecf20Sopenharmony_ci __nbytes -= 1; \ 428c2ecf20Sopenharmony_ci ((unsigned char *) dst_bp)[0] = __x; \ 438c2ecf20Sopenharmony_ci dst_bp += 1; \ 448c2ecf20Sopenharmony_ci } \ 458c2ecf20Sopenharmony_ci} while (0) 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with 488c2ecf20Sopenharmony_ci the assumption that DST_BP is aligned on an OPSIZ multiple. If 498c2ecf20Sopenharmony_ci not all bytes could be easily copied, store remaining number of bytes 508c2ecf20Sopenharmony_ci in NBYTES_LEFT, otherwise store 0. */ 518c2ecf20Sopenharmony_ci/* extern void _wordcopy_fwd_aligned __P ((long int, long int, size_t)); */ 528c2ecf20Sopenharmony_ci/* extern void _wordcopy_fwd_dest_aligned __P ((long int, long int, size_t)); */ 538c2ecf20Sopenharmony_ci#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ 548c2ecf20Sopenharmony_cido { \ 558c2ecf20Sopenharmony_ci if (src_bp % OPSIZ == 0) \ 568c2ecf20Sopenharmony_ci _wordcopy_fwd_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ 578c2ecf20Sopenharmony_ci else \ 588c2ecf20Sopenharmony_ci _wordcopy_fwd_dest_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ 598c2ecf20Sopenharmony_ci src_bp += (nbytes) & -OPSIZ; \ 608c2ecf20Sopenharmony_ci dst_bp += (nbytes) & -OPSIZ; \ 618c2ecf20Sopenharmony_ci (nbytes_left) = (nbytes) % OPSIZ; \ 628c2ecf20Sopenharmony_ci} while (0) 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci/* Threshold value for when to enter the unrolled loops. */ 668c2ecf20Sopenharmony_ci#define OP_T_THRES 16 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to 698c2ecf20Sopenharmony_ci block beginning at DSTP with LEN `op_t' words (not LEN bytes!). 708c2ecf20Sopenharmony_ci Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ 718c2ecf20Sopenharmony_ci/* stream-lined (read x8 + write x8) */ 728c2ecf20Sopenharmony_cistatic void _wordcopy_fwd_aligned(long int dstp, long int srcp, size_t len) 738c2ecf20Sopenharmony_ci{ 748c2ecf20Sopenharmony_ci while (len > 7) { 758c2ecf20Sopenharmony_ci register op_t a0, a1, a2, a3, a4, a5, a6, a7; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci a0 = ((op_t *) srcp)[0]; 788c2ecf20Sopenharmony_ci a1 = ((op_t *) srcp)[1]; 798c2ecf20Sopenharmony_ci a2 = ((op_t *) srcp)[2]; 808c2ecf20Sopenharmony_ci a3 = ((op_t *) srcp)[3]; 818c2ecf20Sopenharmony_ci a4 = ((op_t *) srcp)[4]; 828c2ecf20Sopenharmony_ci a5 = ((op_t *) srcp)[5]; 838c2ecf20Sopenharmony_ci a6 = ((op_t *) srcp)[6]; 848c2ecf20Sopenharmony_ci a7 = ((op_t *) srcp)[7]; 858c2ecf20Sopenharmony_ci ((op_t *) dstp)[0] = a0; 868c2ecf20Sopenharmony_ci ((op_t *) dstp)[1] = a1; 878c2ecf20Sopenharmony_ci ((op_t *) dstp)[2] = a2; 888c2ecf20Sopenharmony_ci ((op_t *) dstp)[3] = a3; 898c2ecf20Sopenharmony_ci ((op_t *) dstp)[4] = a4; 908c2ecf20Sopenharmony_ci ((op_t *) dstp)[5] = a5; 918c2ecf20Sopenharmony_ci ((op_t *) dstp)[6] = a6; 928c2ecf20Sopenharmony_ci ((op_t *) dstp)[7] = a7; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci srcp += 8 * OPSIZ; 958c2ecf20Sopenharmony_ci dstp += 8 * OPSIZ; 968c2ecf20Sopenharmony_ci len -= 8; 978c2ecf20Sopenharmony_ci } 988c2ecf20Sopenharmony_ci while (len > 0) { 998c2ecf20Sopenharmony_ci *(op_t *)dstp = *(op_t *)srcp; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci srcp += OPSIZ; 1028c2ecf20Sopenharmony_ci dstp += OPSIZ; 1038c2ecf20Sopenharmony_ci len -= 1; 1048c2ecf20Sopenharmony_ci } 1058c2ecf20Sopenharmony_ci} 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to 1088c2ecf20Sopenharmony_ci block beginning at DSTP with LEN `op_t' words (not LEN bytes!). 1098c2ecf20Sopenharmony_ci DSTP should be aligned for memory operations on `op_t's, but SRCP must 1108c2ecf20Sopenharmony_ci *not* be aligned. */ 1118c2ecf20Sopenharmony_ci/* stream-lined (read x4 + write x4) */ 1128c2ecf20Sopenharmony_cistatic void _wordcopy_fwd_dest_aligned(long int dstp, long int srcp, 1138c2ecf20Sopenharmony_ci size_t len) 1148c2ecf20Sopenharmony_ci{ 1158c2ecf20Sopenharmony_ci op_t ap; 1168c2ecf20Sopenharmony_ci int sh_1, sh_2; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci /* Calculate how to shift a word read at the memory operation 1198c2ecf20Sopenharmony_ci aligned srcp to make it aligned for copy. */ 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci sh_1 = 8 * (srcp % OPSIZ); 1228c2ecf20Sopenharmony_ci sh_2 = 8 * OPSIZ - sh_1; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci /* Make SRCP aligned by rounding it down to the beginning of the `op_t' 1258c2ecf20Sopenharmony_ci it points in the middle of. */ 1268c2ecf20Sopenharmony_ci srcp &= -OPSIZ; 1278c2ecf20Sopenharmony_ci ap = ((op_t *) srcp)[0]; 1288c2ecf20Sopenharmony_ci srcp += OPSIZ; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci while (len > 3) { 1318c2ecf20Sopenharmony_ci op_t a0, a1, a2, a3; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci a0 = ((op_t *) srcp)[0]; 1348c2ecf20Sopenharmony_ci a1 = ((op_t *) srcp)[1]; 1358c2ecf20Sopenharmony_ci a2 = ((op_t *) srcp)[2]; 1368c2ecf20Sopenharmony_ci a3 = ((op_t *) srcp)[3]; 1378c2ecf20Sopenharmony_ci ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); 1388c2ecf20Sopenharmony_ci ((op_t *) dstp)[1] = MERGE(a0, sh_1, a1, sh_2); 1398c2ecf20Sopenharmony_ci ((op_t *) dstp)[2] = MERGE(a1, sh_1, a2, sh_2); 1408c2ecf20Sopenharmony_ci ((op_t *) dstp)[3] = MERGE(a2, sh_1, a3, sh_2); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci ap = a3; 1438c2ecf20Sopenharmony_ci srcp += 4 * OPSIZ; 1448c2ecf20Sopenharmony_ci dstp += 4 * OPSIZ; 1458c2ecf20Sopenharmony_ci len -= 4; 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci while (len > 0) { 1488c2ecf20Sopenharmony_ci register op_t a0; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci a0 = ((op_t *) srcp)[0]; 1518c2ecf20Sopenharmony_ci ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci ap = a0; 1548c2ecf20Sopenharmony_ci srcp += OPSIZ; 1558c2ecf20Sopenharmony_ci dstp += OPSIZ; 1568c2ecf20Sopenharmony_ci len -= 1; 1578c2ecf20Sopenharmony_ci } 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_civoid *memcpy(void *dstpp, const void *srcpp, size_t len) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci unsigned long int dstp = (long int) dstpp; 1638c2ecf20Sopenharmony_ci unsigned long int srcp = (long int) srcpp; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci /* Copy from the beginning to the end. */ 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci /* If there not too few bytes to copy, use word copy. */ 1688c2ecf20Sopenharmony_ci if (len >= OP_T_THRES) { 1698c2ecf20Sopenharmony_ci /* Copy just a few bytes to make DSTP aligned. */ 1708c2ecf20Sopenharmony_ci len -= (-dstp) % OPSIZ; 1718c2ecf20Sopenharmony_ci BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci /* Copy whole pages from SRCP to DSTP by virtual address 1748c2ecf20Sopenharmony_ci manipulation, as much as possible. */ 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci /* PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len); */ 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci /* Copy from SRCP to DSTP taking advantage of the known 1798c2ecf20Sopenharmony_ci alignment of DSTP. Number of bytes remaining is put in the 1808c2ecf20Sopenharmony_ci third argument, i.e. in LEN. This number may vary from 1818c2ecf20Sopenharmony_ci machine to machine. */ 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci WORD_COPY_FWD(dstp, srcp, len, len); 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci /* Fall out and copy the tail. */ 1868c2ecf20Sopenharmony_ci } 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci /* There are just a few bytes to copy. Use byte memory operations. */ 1898c2ecf20Sopenharmony_ci BYTE_COPY_FWD(dstp, srcp, len); 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci return dstpp; 1928c2ecf20Sopenharmony_ci} 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_civoid *memcpyb(void *dstpp, const void *srcpp, unsigned len) 1958c2ecf20Sopenharmony_ci{ 1968c2ecf20Sopenharmony_ci unsigned long int dstp = (long int) dstpp; 1978c2ecf20Sopenharmony_ci unsigned long int srcp = (long int) srcpp; 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci BYTE_COPY_FWD(dstp, srcp, len); 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci return dstpp; 2028c2ecf20Sopenharmony_ci} 203