18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
38c2ecf20Sopenharmony_ci * Copyright (C) 2008-2009 PetaLogix
48c2ecf20Sopenharmony_ci * Copyright (C) 2007 John Williams
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Reasonably optimised generic C-code for memcpy on Microblaze
78c2ecf20Sopenharmony_ci * This is generic C code to do efficient, alignment-aware memmove.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
108c2ecf20Sopenharmony_ci * http://www.embedded.com/showArticle.jhtml?articleID=19205567
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * Attempts were made, unsuccessfully, to contact the original
138c2ecf20Sopenharmony_ci * author of this code (Michael Morrow, Intel).  Below is the original
148c2ecf20Sopenharmony_ci * copyright notice.
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * This software has been developed by Intel Corporation.
178c2ecf20Sopenharmony_ci * Intel specifically disclaims all warranties, express or
188c2ecf20Sopenharmony_ci * implied, and all liability, including consequential and
198c2ecf20Sopenharmony_ci * other indirect damages, for the use of this program, including
208c2ecf20Sopenharmony_ci * liability for infringement of any proprietary rights,
218c2ecf20Sopenharmony_ci * and including the warranties of merchantability and fitness
228c2ecf20Sopenharmony_ci * for a particular purpose. Intel does not assume any
238c2ecf20Sopenharmony_ci * responsibility for and errors which may appear in this program
248c2ecf20Sopenharmony_ci * not any responsibility to update it.
258c2ecf20Sopenharmony_ci */
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci#include <linux/export.h>
288c2ecf20Sopenharmony_ci#include <linux/types.h>
298c2ecf20Sopenharmony_ci#include <linux/stddef.h>
308c2ecf20Sopenharmony_ci#include <linux/compiler.h>
318c2ecf20Sopenharmony_ci#include <linux/string.h>
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#ifdef __HAVE_ARCH_MEMMOVE
348c2ecf20Sopenharmony_ci#ifndef CONFIG_OPT_LIB_FUNCTION
358c2ecf20Sopenharmony_civoid *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
368c2ecf20Sopenharmony_ci{
378c2ecf20Sopenharmony_ci	const char *src = v_src;
388c2ecf20Sopenharmony_ci	char *dst = v_dst;
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci	if (!c)
418c2ecf20Sopenharmony_ci		return v_dst;
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	/* Use memcpy when source is higher than dest */
448c2ecf20Sopenharmony_ci	if (v_dst <= v_src)
458c2ecf20Sopenharmony_ci		return memcpy(v_dst, v_src, c);
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	/* copy backwards, from end to beginning */
488c2ecf20Sopenharmony_ci	src += c;
498c2ecf20Sopenharmony_ci	dst += c;
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	/* Simple, byte oriented memmove. */
528c2ecf20Sopenharmony_ci	while (c--)
538c2ecf20Sopenharmony_ci		*--dst = *--src;
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	return v_dst;
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ci#else /* CONFIG_OPT_LIB_FUNCTION */
588c2ecf20Sopenharmony_civoid *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	const char *src = v_src;
618c2ecf20Sopenharmony_ci	char *dst = v_dst;
628c2ecf20Sopenharmony_ci	const uint32_t *i_src;
638c2ecf20Sopenharmony_ci	uint32_t *i_dst;
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	if (!c)
668c2ecf20Sopenharmony_ci		return v_dst;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	/* Use memcpy when source is higher than dest */
698c2ecf20Sopenharmony_ci	if (v_dst <= v_src)
708c2ecf20Sopenharmony_ci		return memcpy(v_dst, v_src, c);
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	/* The following code tries to optimize the copy by using unsigned
738c2ecf20Sopenharmony_ci	 * alignment. This will work fine if both source and destination are
748c2ecf20Sopenharmony_ci	 * aligned on the same boundary. However, if they are aligned on
758c2ecf20Sopenharmony_ci	 * different boundaries shifts will be necessary. This might result in
768c2ecf20Sopenharmony_ci	 * bad performance on MicroBlaze systems without a barrel shifter.
778c2ecf20Sopenharmony_ci	 */
788c2ecf20Sopenharmony_ci	/* FIXME this part needs more test */
798c2ecf20Sopenharmony_ci	/* Do a descending copy - this is a bit trickier! */
808c2ecf20Sopenharmony_ci	dst += c;
818c2ecf20Sopenharmony_ci	src += c;
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	if (c >= 4) {
848c2ecf20Sopenharmony_ci		unsigned  value, buf_hold;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci		/* Align the destination to a word boundary. */
878c2ecf20Sopenharmony_ci		/* This is done in an endian independent manner. */
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci		switch ((unsigned long)dst & 3) {
908c2ecf20Sopenharmony_ci		case 3:
918c2ecf20Sopenharmony_ci			*--dst = *--src;
928c2ecf20Sopenharmony_ci			--c;
938c2ecf20Sopenharmony_ci		case 2:
948c2ecf20Sopenharmony_ci			*--dst = *--src;
958c2ecf20Sopenharmony_ci			--c;
968c2ecf20Sopenharmony_ci		case 1:
978c2ecf20Sopenharmony_ci			*--dst = *--src;
988c2ecf20Sopenharmony_ci			--c;
998c2ecf20Sopenharmony_ci		}
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci		i_dst = (void *)dst;
1028c2ecf20Sopenharmony_ci		/* Choose a copy scheme based on the source */
1038c2ecf20Sopenharmony_ci		/* alignment relative to dstination. */
1048c2ecf20Sopenharmony_ci		switch ((unsigned long)src & 3) {
1058c2ecf20Sopenharmony_ci		case 0x0:	/* Both byte offsets are aligned */
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci			i_src  = (const void *)src;
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4)
1108c2ecf20Sopenharmony_ci				*--i_dst = *--i_src;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci			src  = (const void *)i_src;
1138c2ecf20Sopenharmony_ci			break;
1148c2ecf20Sopenharmony_ci		case 0x1:	/* Unaligned - Off by 1 */
1158c2ecf20Sopenharmony_ci			/* Word align the source */
1168c2ecf20Sopenharmony_ci			i_src = (const void *) (((unsigned)src + 4) & ~3);
1178c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__
1188c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1198c2ecf20Sopenharmony_ci			buf_hold = *--i_src >> 24;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1228c2ecf20Sopenharmony_ci				value = *--i_src;
1238c2ecf20Sopenharmony_ci				*--i_dst = buf_hold << 8 | value;
1248c2ecf20Sopenharmony_ci				buf_hold = value >> 24;
1258c2ecf20Sopenharmony_ci			}
1268c2ecf20Sopenharmony_ci#else
1278c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1288c2ecf20Sopenharmony_ci			buf_hold = (*--i_src & 0xFF) << 24;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1318c2ecf20Sopenharmony_ci				value = *--i_src;
1328c2ecf20Sopenharmony_ci				*--i_dst = buf_hold |
1338c2ecf20Sopenharmony_ci						((value & 0xFFFFFF00) >> 8);
1348c2ecf20Sopenharmony_ci				buf_hold = (value  & 0xFF) << 24;
1358c2ecf20Sopenharmony_ci			}
1368c2ecf20Sopenharmony_ci#endif
1378c2ecf20Sopenharmony_ci			/* Realign the source */
1388c2ecf20Sopenharmony_ci			src = (const void *)i_src;
1398c2ecf20Sopenharmony_ci			src += 1;
1408c2ecf20Sopenharmony_ci			break;
1418c2ecf20Sopenharmony_ci		case 0x2:	/* Unaligned - Off by 2 */
1428c2ecf20Sopenharmony_ci			/* Word align the source */
1438c2ecf20Sopenharmony_ci			i_src = (const void *) (((unsigned)src + 4) & ~3);
1448c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__
1458c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1468c2ecf20Sopenharmony_ci			buf_hold = *--i_src >> 16;
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1498c2ecf20Sopenharmony_ci				value = *--i_src;
1508c2ecf20Sopenharmony_ci				*--i_dst = buf_hold << 16 | value;
1518c2ecf20Sopenharmony_ci				buf_hold = value >> 16;
1528c2ecf20Sopenharmony_ci			}
1538c2ecf20Sopenharmony_ci#else
1548c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1558c2ecf20Sopenharmony_ci			buf_hold = (*--i_src & 0xFFFF) << 16;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1588c2ecf20Sopenharmony_ci				value = *--i_src;
1598c2ecf20Sopenharmony_ci				*--i_dst = buf_hold |
1608c2ecf20Sopenharmony_ci						((value & 0xFFFF0000) >> 16);
1618c2ecf20Sopenharmony_ci				buf_hold = (value & 0xFFFF) << 16;
1628c2ecf20Sopenharmony_ci			}
1638c2ecf20Sopenharmony_ci#endif
1648c2ecf20Sopenharmony_ci			/* Realign the source */
1658c2ecf20Sopenharmony_ci			src = (const void *)i_src;
1668c2ecf20Sopenharmony_ci			src += 2;
1678c2ecf20Sopenharmony_ci			break;
1688c2ecf20Sopenharmony_ci		case 0x3:	/* Unaligned - Off by 3 */
1698c2ecf20Sopenharmony_ci			/* Word align the source */
1708c2ecf20Sopenharmony_ci			i_src = (const void *) (((unsigned)src + 4) & ~3);
1718c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__
1728c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1738c2ecf20Sopenharmony_ci			buf_hold = *--i_src >> 8;
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1768c2ecf20Sopenharmony_ci				value = *--i_src;
1778c2ecf20Sopenharmony_ci				*--i_dst = buf_hold << 24 | value;
1788c2ecf20Sopenharmony_ci				buf_hold = value >> 8;
1798c2ecf20Sopenharmony_ci			}
1808c2ecf20Sopenharmony_ci#else
1818c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1828c2ecf20Sopenharmony_ci			buf_hold = (*--i_src & 0xFFFFFF) << 8;
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1858c2ecf20Sopenharmony_ci				value = *--i_src;
1868c2ecf20Sopenharmony_ci				*--i_dst = buf_hold |
1878c2ecf20Sopenharmony_ci						((value & 0xFF000000) >> 24);
1888c2ecf20Sopenharmony_ci				buf_hold = (value & 0xFFFFFF) << 8;
1898c2ecf20Sopenharmony_ci			}
1908c2ecf20Sopenharmony_ci#endif
1918c2ecf20Sopenharmony_ci			/* Realign the source */
1928c2ecf20Sopenharmony_ci			src = (const void *)i_src;
1938c2ecf20Sopenharmony_ci			src += 3;
1948c2ecf20Sopenharmony_ci			break;
1958c2ecf20Sopenharmony_ci		}
1968c2ecf20Sopenharmony_ci		dst = (void *)i_dst;
1978c2ecf20Sopenharmony_ci	}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	/* simple fast copy, ... unless a cache boundary is crossed */
2008c2ecf20Sopenharmony_ci	/* Finish off any remaining bytes */
2018c2ecf20Sopenharmony_ci	switch (c) {
2028c2ecf20Sopenharmony_ci	case 4:
2038c2ecf20Sopenharmony_ci		*--dst = *--src;
2048c2ecf20Sopenharmony_ci	case 3:
2058c2ecf20Sopenharmony_ci		*--dst = *--src;
2068c2ecf20Sopenharmony_ci	case 2:
2078c2ecf20Sopenharmony_ci		*--dst = *--src;
2088c2ecf20Sopenharmony_ci	case 1:
2098c2ecf20Sopenharmony_ci		*--dst = *--src;
2108c2ecf20Sopenharmony_ci	}
2118c2ecf20Sopenharmony_ci	return v_dst;
2128c2ecf20Sopenharmony_ci}
2138c2ecf20Sopenharmony_ci#endif /* CONFIG_OPT_LIB_FUNCTION */
2148c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memmove);
2158c2ecf20Sopenharmony_ci#endif /* __HAVE_ARCH_MEMMOVE */
216