162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> 362306a36Sopenharmony_ci * Copyright (C) 2008-2009 PetaLogix 462306a36Sopenharmony_ci * Copyright (C) 2007 John Williams 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Reasonably optimised generic C-code for memcpy on Microblaze 762306a36Sopenharmony_ci * This is generic C code to do efficient, alignment-aware memmove. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * It is based on demo code originally Copyright 2001 by Intel Corp, taken from 1062306a36Sopenharmony_ci * http://www.embedded.com/showArticle.jhtml?articleID=19205567 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * Attempts were made, unsuccessfully, to contact the original 1362306a36Sopenharmony_ci * author of this code (Michael Morrow, Intel). Below is the original 1462306a36Sopenharmony_ci * copyright notice. 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * This software has been developed by Intel Corporation. 1762306a36Sopenharmony_ci * Intel specifically disclaims all warranties, express or 1862306a36Sopenharmony_ci * implied, and all liability, including consequential and 1962306a36Sopenharmony_ci * other indirect damages, for the use of this program, including 2062306a36Sopenharmony_ci * liability for infringement of any proprietary rights, 2162306a36Sopenharmony_ci * and including the warranties of merchantability and fitness 2262306a36Sopenharmony_ci * for a particular purpose. Intel does not assume any 2362306a36Sopenharmony_ci * responsibility for and errors which may appear in this program 2462306a36Sopenharmony_ci * not any responsibility to update it. 2562306a36Sopenharmony_ci */ 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#include <linux/export.h> 2862306a36Sopenharmony_ci#include <linux/types.h> 2962306a36Sopenharmony_ci#include <linux/stddef.h> 3062306a36Sopenharmony_ci#include <linux/compiler.h> 3162306a36Sopenharmony_ci#include <linux/string.h> 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#ifdef CONFIG_OPT_LIB_FUNCTION 3462306a36Sopenharmony_civoid *memmove(void *v_dst, const void *v_src, __kernel_size_t c) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci const char *src = v_src; 3762306a36Sopenharmony_ci char *dst = v_dst; 3862306a36Sopenharmony_ci const uint32_t *i_src; 3962306a36Sopenharmony_ci uint32_t *i_dst; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci if (!c) 4262306a36Sopenharmony_ci return v_dst; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci /* Use memcpy when source is higher than dest */ 4562306a36Sopenharmony_ci if (v_dst <= v_src) 4662306a36Sopenharmony_ci return memcpy(v_dst, v_src, c); 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci /* The following code tries to optimize the copy by using unsigned 4962306a36Sopenharmony_ci * alignment. This will work fine if both source and destination are 5062306a36Sopenharmony_ci * aligned on the same boundary. However, if they are aligned on 5162306a36Sopenharmony_ci * different boundaries shifts will be necessary. This might result in 5262306a36Sopenharmony_ci * bad performance on MicroBlaze systems without a barrel shifter. 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_ci /* FIXME this part needs more test */ 5562306a36Sopenharmony_ci /* Do a descending copy - this is a bit trickier! */ 5662306a36Sopenharmony_ci dst += c; 5762306a36Sopenharmony_ci src += c; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci if (c >= 4) { 6062306a36Sopenharmony_ci unsigned value, buf_hold; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci /* Align the destination to a word boundary. */ 6362306a36Sopenharmony_ci /* This is done in an endian independent manner. */ 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci switch ((unsigned long)dst & 3) { 6662306a36Sopenharmony_ci case 3: 6762306a36Sopenharmony_ci *--dst = *--src; 6862306a36Sopenharmony_ci --c; 6962306a36Sopenharmony_ci fallthrough; 7062306a36Sopenharmony_ci case 2: 7162306a36Sopenharmony_ci *--dst = *--src; 7262306a36Sopenharmony_ci --c; 7362306a36Sopenharmony_ci fallthrough; 7462306a36Sopenharmony_ci case 1: 7562306a36Sopenharmony_ci *--dst = *--src; 7662306a36Sopenharmony_ci --c; 7762306a36Sopenharmony_ci } 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci i_dst = (void *)dst; 8062306a36Sopenharmony_ci /* Choose a copy scheme based on the source */ 8162306a36Sopenharmony_ci /* alignment relative to destination. */ 8262306a36Sopenharmony_ci switch ((unsigned long)src & 3) { 8362306a36Sopenharmony_ci case 0x0: /* Both byte offsets are aligned */ 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci i_src = (const void *)src; 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci for (; c >= 4; c -= 4) 8862306a36Sopenharmony_ci *--i_dst = *--i_src; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci src = (const void *)i_src; 9162306a36Sopenharmony_ci break; 9262306a36Sopenharmony_ci case 0x1: /* Unaligned - Off by 1 */ 9362306a36Sopenharmony_ci /* Word align the source */ 9462306a36Sopenharmony_ci i_src = (const void *) (((unsigned)src + 4) & ~3); 9562306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__ 9662306a36Sopenharmony_ci /* Load the holding buffer */ 9762306a36Sopenharmony_ci buf_hold = *--i_src >> 24; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci for (; c >= 4; c -= 4) { 10062306a36Sopenharmony_ci value = *--i_src; 10162306a36Sopenharmony_ci *--i_dst = buf_hold << 8 | value; 10262306a36Sopenharmony_ci buf_hold = value >> 24; 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci#else 10562306a36Sopenharmony_ci /* Load the holding buffer */ 10662306a36Sopenharmony_ci buf_hold = (*--i_src & 0xFF) << 24; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci for (; c >= 4; c -= 4) { 10962306a36Sopenharmony_ci value = *--i_src; 11062306a36Sopenharmony_ci *--i_dst = buf_hold | 11162306a36Sopenharmony_ci ((value & 0xFFFFFF00) >> 8); 11262306a36Sopenharmony_ci buf_hold = (value & 0xFF) << 24; 11362306a36Sopenharmony_ci } 11462306a36Sopenharmony_ci#endif 11562306a36Sopenharmony_ci /* Realign the source */ 11662306a36Sopenharmony_ci src = (const void *)i_src; 11762306a36Sopenharmony_ci src += 1; 11862306a36Sopenharmony_ci break; 11962306a36Sopenharmony_ci case 0x2: /* Unaligned - Off by 2 */ 12062306a36Sopenharmony_ci /* Word align the source */ 12162306a36Sopenharmony_ci i_src = (const void *) (((unsigned)src + 4) & ~3); 12262306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__ 12362306a36Sopenharmony_ci /* Load the holding buffer */ 12462306a36Sopenharmony_ci buf_hold = *--i_src >> 16; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci for (; c >= 4; c -= 4) { 12762306a36Sopenharmony_ci value = *--i_src; 12862306a36Sopenharmony_ci *--i_dst = buf_hold << 16 | value; 12962306a36Sopenharmony_ci buf_hold = value >> 16; 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci#else 13262306a36Sopenharmony_ci /* Load the holding buffer */ 13362306a36Sopenharmony_ci buf_hold = (*--i_src & 0xFFFF) << 16; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci for (; c >= 4; c -= 4) { 13662306a36Sopenharmony_ci value = *--i_src; 13762306a36Sopenharmony_ci *--i_dst = buf_hold | 13862306a36Sopenharmony_ci ((value & 0xFFFF0000) >> 16); 13962306a36Sopenharmony_ci buf_hold = (value & 0xFFFF) << 16; 14062306a36Sopenharmony_ci } 14162306a36Sopenharmony_ci#endif 14262306a36Sopenharmony_ci /* Realign the source */ 14362306a36Sopenharmony_ci src = (const void *)i_src; 14462306a36Sopenharmony_ci src += 2; 14562306a36Sopenharmony_ci break; 14662306a36Sopenharmony_ci case 0x3: /* Unaligned - Off by 3 */ 14762306a36Sopenharmony_ci /* Word align the source */ 14862306a36Sopenharmony_ci i_src = (const void *) (((unsigned)src + 4) & ~3); 14962306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__ 15062306a36Sopenharmony_ci /* Load the holding buffer */ 15162306a36Sopenharmony_ci buf_hold = *--i_src >> 8; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci for (; c >= 4; c -= 4) { 15462306a36Sopenharmony_ci value = *--i_src; 15562306a36Sopenharmony_ci *--i_dst = buf_hold << 24 | value; 15662306a36Sopenharmony_ci buf_hold = value >> 8; 15762306a36Sopenharmony_ci } 15862306a36Sopenharmony_ci#else 15962306a36Sopenharmony_ci /* Load the holding buffer */ 16062306a36Sopenharmony_ci buf_hold = (*--i_src & 0xFFFFFF) << 8; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci for (; c >= 4; c -= 4) { 16362306a36Sopenharmony_ci value = *--i_src; 16462306a36Sopenharmony_ci *--i_dst = buf_hold | 16562306a36Sopenharmony_ci ((value & 0xFF000000) >> 24); 16662306a36Sopenharmony_ci buf_hold = (value & 0xFFFFFF) << 8; 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci#endif 16962306a36Sopenharmony_ci /* Realign the source */ 17062306a36Sopenharmony_ci src = (const void *)i_src; 17162306a36Sopenharmony_ci src += 3; 17262306a36Sopenharmony_ci break; 17362306a36Sopenharmony_ci } 17462306a36Sopenharmony_ci dst = (void *)i_dst; 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci /* simple fast copy, ... unless a cache boundary is crossed */ 17862306a36Sopenharmony_ci /* Finish off any remaining bytes */ 17962306a36Sopenharmony_ci switch (c) { 18062306a36Sopenharmony_ci case 4: 18162306a36Sopenharmony_ci *--dst = *--src; 18262306a36Sopenharmony_ci fallthrough; 18362306a36Sopenharmony_ci case 3: 18462306a36Sopenharmony_ci *--dst = *--src; 18562306a36Sopenharmony_ci fallthrough; 18662306a36Sopenharmony_ci case 2: 18762306a36Sopenharmony_ci *--dst = *--src; 18862306a36Sopenharmony_ci fallthrough; 18962306a36Sopenharmony_ci case 1: 19062306a36Sopenharmony_ci *--dst = *--src; 19162306a36Sopenharmony_ci } 19262306a36Sopenharmony_ci return v_dst; 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ciEXPORT_SYMBOL(memmove); 19562306a36Sopenharmony_ci#endif /* CONFIG_OPT_LIB_FUNCTION */ 196