18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> 38c2ecf20Sopenharmony_ci * Copyright (C) 2008-2009 PetaLogix 48c2ecf20Sopenharmony_ci * Copyright (C) 2007 John Williams 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Reasonably optimised generic C-code for memcpy on Microblaze 78c2ecf20Sopenharmony_ci * This is generic C code to do efficient, alignment-aware memcpy. 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * It is based on demo code originally Copyright 2001 by Intel Corp, taken from 108c2ecf20Sopenharmony_ci * http://www.embedded.com/showArticle.jhtml?articleID=19205567 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * Attempts were made, unsuccessfully, to contact the original 138c2ecf20Sopenharmony_ci * author of this code (Michael Morrow, Intel). Below is the original 148c2ecf20Sopenharmony_ci * copyright notice. 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * This software has been developed by Intel Corporation. 178c2ecf20Sopenharmony_ci * Intel specifically disclaims all warranties, express or 188c2ecf20Sopenharmony_ci * implied, and all liability, including consequential and 198c2ecf20Sopenharmony_ci * other indirect damages, for the use of this program, including 208c2ecf20Sopenharmony_ci * liability for infringement of any proprietary rights, 218c2ecf20Sopenharmony_ci * and including the warranties of merchantability and fitness 228c2ecf20Sopenharmony_ci * for a particular purpose. Intel does not assume any 238c2ecf20Sopenharmony_ci * responsibility for and errors which may appear in this program 248c2ecf20Sopenharmony_ci * not any responsibility to update it. 258c2ecf20Sopenharmony_ci */ 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci#include <linux/export.h> 288c2ecf20Sopenharmony_ci#include <linux/types.h> 298c2ecf20Sopenharmony_ci#include <linux/stddef.h> 308c2ecf20Sopenharmony_ci#include <linux/compiler.h> 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci#include <linux/string.h> 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci#ifdef __HAVE_ARCH_MEMCPY 358c2ecf20Sopenharmony_ci#ifndef CONFIG_OPT_LIB_FUNCTION 368c2ecf20Sopenharmony_civoid *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) 378c2ecf20Sopenharmony_ci{ 388c2ecf20Sopenharmony_ci const char *src = v_src; 398c2ecf20Sopenharmony_ci char *dst = v_dst; 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci /* Simple, byte oriented memcpy. */ 428c2ecf20Sopenharmony_ci while (c--) 438c2ecf20Sopenharmony_ci *dst++ = *src++; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci return v_dst; 468c2ecf20Sopenharmony_ci} 478c2ecf20Sopenharmony_ci#else /* CONFIG_OPT_LIB_FUNCTION */ 488c2ecf20Sopenharmony_civoid *memcpy(void *v_dst, const void *v_src, __kernel_size_t c) 498c2ecf20Sopenharmony_ci{ 508c2ecf20Sopenharmony_ci const char *src = v_src; 518c2ecf20Sopenharmony_ci char *dst = v_dst; 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci /* The following code tries to optimize the copy by using unsigned 548c2ecf20Sopenharmony_ci * alignment. This will work fine if both source and destination are 558c2ecf20Sopenharmony_ci * aligned on the same boundary. However, if they are aligned on 568c2ecf20Sopenharmony_ci * different boundaries shifts will be necessary. This might result in 578c2ecf20Sopenharmony_ci * bad performance on MicroBlaze systems without a barrel shifter. 588c2ecf20Sopenharmony_ci */ 598c2ecf20Sopenharmony_ci const uint32_t *i_src; 608c2ecf20Sopenharmony_ci uint32_t *i_dst; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci if (likely(c >= 4)) { 638c2ecf20Sopenharmony_ci unsigned value, buf_hold; 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci /* Align the destination to a word boundary. */ 668c2ecf20Sopenharmony_ci /* This is done in an endian independent manner. */ 678c2ecf20Sopenharmony_ci switch ((unsigned long)dst & 3) { 688c2ecf20Sopenharmony_ci case 1: 698c2ecf20Sopenharmony_ci *dst++ = *src++; 708c2ecf20Sopenharmony_ci --c; 718c2ecf20Sopenharmony_ci case 2: 728c2ecf20Sopenharmony_ci *dst++ = *src++; 738c2ecf20Sopenharmony_ci --c; 748c2ecf20Sopenharmony_ci case 3: 758c2ecf20Sopenharmony_ci *dst++ = *src++; 768c2ecf20Sopenharmony_ci --c; 778c2ecf20Sopenharmony_ci } 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci i_dst = (void *)dst; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci /* Choose a copy scheme based on the source */ 828c2ecf20Sopenharmony_ci /* alignment relative to destination. */ 838c2ecf20Sopenharmony_ci switch ((unsigned long)src & 3) { 848c2ecf20Sopenharmony_ci case 0x0: /* Both byte offsets are aligned */ 858c2ecf20Sopenharmony_ci i_src = (const void *)src; 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci for (; c >= 4; c -= 4) 888c2ecf20Sopenharmony_ci *i_dst++ = *i_src++; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci src = (const void *)i_src; 918c2ecf20Sopenharmony_ci break; 928c2ecf20Sopenharmony_ci case 0x1: /* Unaligned - Off by 1 */ 938c2ecf20Sopenharmony_ci /* Word align the source */ 948c2ecf20Sopenharmony_ci i_src = (const void *) ((unsigned)src & ~3); 958c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__ 968c2ecf20Sopenharmony_ci /* Load the holding buffer */ 978c2ecf20Sopenharmony_ci buf_hold = *i_src++ << 8; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci for (; c >= 4; c -= 4) { 1008c2ecf20Sopenharmony_ci value = *i_src++; 1018c2ecf20Sopenharmony_ci *i_dst++ = buf_hold | value >> 24; 1028c2ecf20Sopenharmony_ci buf_hold = value << 8; 1038c2ecf20Sopenharmony_ci } 1048c2ecf20Sopenharmony_ci#else 1058c2ecf20Sopenharmony_ci /* Load the holding buffer */ 1068c2ecf20Sopenharmony_ci buf_hold = (*i_src++ & 0xFFFFFF00) >> 8; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci for (; c >= 4; c -= 4) { 1098c2ecf20Sopenharmony_ci value = *i_src++; 1108c2ecf20Sopenharmony_ci *i_dst++ = buf_hold | ((value & 0xFF) << 24); 1118c2ecf20Sopenharmony_ci buf_hold = (value & 0xFFFFFF00) >> 8; 1128c2ecf20Sopenharmony_ci } 1138c2ecf20Sopenharmony_ci#endif 1148c2ecf20Sopenharmony_ci /* Realign the source */ 1158c2ecf20Sopenharmony_ci src = (const void *)i_src; 1168c2ecf20Sopenharmony_ci src -= 3; 1178c2ecf20Sopenharmony_ci break; 1188c2ecf20Sopenharmony_ci case 0x2: /* Unaligned - Off by 2 */ 1198c2ecf20Sopenharmony_ci /* Word align the source */ 1208c2ecf20Sopenharmony_ci i_src = (const void *) ((unsigned)src & ~3); 1218c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__ 1228c2ecf20Sopenharmony_ci /* Load the holding buffer */ 1238c2ecf20Sopenharmony_ci buf_hold = *i_src++ << 16; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci for (; c >= 4; c -= 4) { 1268c2ecf20Sopenharmony_ci value = *i_src++; 1278c2ecf20Sopenharmony_ci *i_dst++ = buf_hold | value >> 16; 1288c2ecf20Sopenharmony_ci buf_hold = value << 16; 1298c2ecf20Sopenharmony_ci } 1308c2ecf20Sopenharmony_ci#else 1318c2ecf20Sopenharmony_ci /* Load the holding buffer */ 1328c2ecf20Sopenharmony_ci buf_hold = (*i_src++ & 0xFFFF0000) >> 16; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci for (; c >= 4; c -= 4) { 1358c2ecf20Sopenharmony_ci value = *i_src++; 1368c2ecf20Sopenharmony_ci *i_dst++ = buf_hold | ((value & 0xFFFF) << 16); 1378c2ecf20Sopenharmony_ci buf_hold = (value & 0xFFFF0000) >> 16; 1388c2ecf20Sopenharmony_ci } 1398c2ecf20Sopenharmony_ci#endif 1408c2ecf20Sopenharmony_ci /* Realign the source */ 1418c2ecf20Sopenharmony_ci src = (const void *)i_src; 1428c2ecf20Sopenharmony_ci src -= 2; 1438c2ecf20Sopenharmony_ci break; 1448c2ecf20Sopenharmony_ci case 0x3: /* Unaligned - Off by 3 */ 1458c2ecf20Sopenharmony_ci /* Word align the source */ 1468c2ecf20Sopenharmony_ci i_src = (const void *) ((unsigned)src & ~3); 1478c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__ 1488c2ecf20Sopenharmony_ci /* Load the holding buffer */ 1498c2ecf20Sopenharmony_ci buf_hold = *i_src++ << 24; 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci for (; c >= 4; c -= 4) { 1528c2ecf20Sopenharmony_ci value = *i_src++; 1538c2ecf20Sopenharmony_ci *i_dst++ = buf_hold | value >> 8; 1548c2ecf20Sopenharmony_ci buf_hold = value << 24; 1558c2ecf20Sopenharmony_ci } 1568c2ecf20Sopenharmony_ci#else 1578c2ecf20Sopenharmony_ci /* Load the holding buffer */ 1588c2ecf20Sopenharmony_ci buf_hold = (*i_src++ & 0xFF000000) >> 24; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci for (; c >= 4; c -= 4) { 1618c2ecf20Sopenharmony_ci value = *i_src++; 1628c2ecf20Sopenharmony_ci *i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8); 1638c2ecf20Sopenharmony_ci buf_hold = (value & 0xFF000000) >> 24; 1648c2ecf20Sopenharmony_ci } 1658c2ecf20Sopenharmony_ci#endif 1668c2ecf20Sopenharmony_ci /* Realign the source */ 1678c2ecf20Sopenharmony_ci src = (const void *)i_src; 1688c2ecf20Sopenharmony_ci src -= 1; 1698c2ecf20Sopenharmony_ci break; 1708c2ecf20Sopenharmony_ci } 1718c2ecf20Sopenharmony_ci dst = (void *)i_dst; 1728c2ecf20Sopenharmony_ci } 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci /* Finish off any remaining bytes */ 1758c2ecf20Sopenharmony_ci /* simple fast copy, ... unless a cache boundary is crossed */ 1768c2ecf20Sopenharmony_ci switch (c) { 1778c2ecf20Sopenharmony_ci case 3: 1788c2ecf20Sopenharmony_ci *dst++ = *src++; 1798c2ecf20Sopenharmony_ci case 2: 1808c2ecf20Sopenharmony_ci *dst++ = *src++; 1818c2ecf20Sopenharmony_ci case 1: 1828c2ecf20Sopenharmony_ci *dst++ = *src++; 1838c2ecf20Sopenharmony_ci } 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci return v_dst; 1868c2ecf20Sopenharmony_ci} 1878c2ecf20Sopenharmony_ci#endif /* CONFIG_OPT_LIB_FUNCTION */ 1888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy); 1898c2ecf20Sopenharmony_ci#endif /* __HAVE_ARCH_MEMCPY */ 190