18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * arch/openrisc/lib/memcpy.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Optimized memory copy routines for openrisc. These are mostly copied 68c2ecf20Sopenharmony_ci * from ohter sources but slightly entended based on ideas discuassed in 78c2ecf20Sopenharmony_ci * #openrisc. 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * The word unroll implementation is an extension to the arm byte 108c2ecf20Sopenharmony_ci * unrolled implementation, but using word copies (if things are 118c2ecf20Sopenharmony_ci * properly aligned) 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * The great arm loop unroll algorithm can be found at: 148c2ecf20Sopenharmony_ci * arch/arm/boot/compressed/string.c 158c2ecf20Sopenharmony_ci */ 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <linux/export.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include <linux/string.h> 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci#ifdef CONFIG_OR1K_1200 228c2ecf20Sopenharmony_ci/* 238c2ecf20Sopenharmony_ci * Do memcpy with word copies and loop unrolling. This gives the 248c2ecf20Sopenharmony_ci * best performance on the OR1200 and MOR1KX archirectures 258c2ecf20Sopenharmony_ci */ 268c2ecf20Sopenharmony_civoid *memcpy(void *dest, __const void *src, __kernel_size_t n) 278c2ecf20Sopenharmony_ci{ 288c2ecf20Sopenharmony_ci int i = 0; 298c2ecf20Sopenharmony_ci unsigned char *d, *s; 308c2ecf20Sopenharmony_ci uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci /* If both source and dest are word aligned copy words */ 338c2ecf20Sopenharmony_ci if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { 348c2ecf20Sopenharmony_ci /* Copy 32 bytes per loop */ 358c2ecf20Sopenharmony_ci for (i = n >> 5; i > 0; i--) { 368c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 378c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 388c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 398c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 408c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 418c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 428c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 438c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 448c2ecf20Sopenharmony_ci } 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci if (n & 1 << 4) { 478c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 488c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 498c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 508c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 518c2ecf20Sopenharmony_ci } 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci if (n & 1 << 3) { 548c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 558c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 568c2ecf20Sopenharmony_ci } 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci if (n & 1 << 2) 598c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci d = (unsigned char *)dest_w; 628c2ecf20Sopenharmony_ci s = (unsigned char *)src_w; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci } else { 658c2ecf20Sopenharmony_ci d = (unsigned char *)dest_w; 668c2ecf20Sopenharmony_ci s = (unsigned char *)src_w; 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci for (i = n >> 3; i > 0; i--) { 698c2ecf20Sopenharmony_ci *d++ = *s++; 708c2ecf20Sopenharmony_ci *d++ = *s++; 718c2ecf20Sopenharmony_ci *d++ = *s++; 728c2ecf20Sopenharmony_ci *d++ = *s++; 738c2ecf20Sopenharmony_ci *d++ = *s++; 748c2ecf20Sopenharmony_ci *d++ = *s++; 758c2ecf20Sopenharmony_ci *d++ = *s++; 768c2ecf20Sopenharmony_ci *d++ = *s++; 778c2ecf20Sopenharmony_ci } 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci if (n & 1 << 2) { 808c2ecf20Sopenharmony_ci *d++ = *s++; 818c2ecf20Sopenharmony_ci *d++ = *s++; 828c2ecf20Sopenharmony_ci *d++ = *s++; 838c2ecf20Sopenharmony_ci *d++ = *s++; 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci } 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci if (n & 1 << 1) { 888c2ecf20Sopenharmony_ci *d++ = *s++; 898c2ecf20Sopenharmony_ci *d++ = *s++; 908c2ecf20Sopenharmony_ci } 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci if (n & 1) 938c2ecf20Sopenharmony_ci *d++ = *s++; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci return dest; 968c2ecf20Sopenharmony_ci} 978c2ecf20Sopenharmony_ci#else 988c2ecf20Sopenharmony_ci/* 998c2ecf20Sopenharmony_ci * Use word copies but no loop unrolling as we cannot assume there 1008c2ecf20Sopenharmony_ci * will be benefits on the archirecture 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_civoid *memcpy(void *dest, __const void *src, __kernel_size_t n) 1038c2ecf20Sopenharmony_ci{ 1048c2ecf20Sopenharmony_ci unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src; 1058c2ecf20Sopenharmony_ci uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci /* If both source and dest are word aligned copy words */ 1088c2ecf20Sopenharmony_ci if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { 1098c2ecf20Sopenharmony_ci for (; n >= 4; n -= 4) 1108c2ecf20Sopenharmony_ci *dest_w++ = *src_w++; 1118c2ecf20Sopenharmony_ci } 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci d = (unsigned char *)dest_w; 1148c2ecf20Sopenharmony_ci s = (unsigned char *)src_w; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci /* For remaining or if not aligned, copy bytes */ 1178c2ecf20Sopenharmony_ci for (; n >= 1; n -= 1) 1188c2ecf20Sopenharmony_ci *d++ = *s++; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci return dest; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci} 1238c2ecf20Sopenharmony_ci#endif 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy); 126