18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
38c2ecf20Sopenharmony_ci * Copyright (C) 2008-2009 PetaLogix
48c2ecf20Sopenharmony_ci * Copyright (C) 2007 John Williams
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Reasonably optimised generic C-code for memcpy on Microblaze
78c2ecf20Sopenharmony_ci * This is generic C code to do efficient, alignment-aware memcpy.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
108c2ecf20Sopenharmony_ci * http://www.embedded.com/showArticle.jhtml?articleID=19205567
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * Attempts were made, unsuccessfully, to contact the original
138c2ecf20Sopenharmony_ci * author of this code (Michael Morrow, Intel).  Below is the original
148c2ecf20Sopenharmony_ci * copyright notice.
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * This software has been developed by Intel Corporation.
178c2ecf20Sopenharmony_ci * Intel specifically disclaims all warranties, express or
188c2ecf20Sopenharmony_ci * implied, and all liability, including consequential and
198c2ecf20Sopenharmony_ci * other indirect damages, for the use of this program, including
208c2ecf20Sopenharmony_ci * liability for infringement of any proprietary rights,
218c2ecf20Sopenharmony_ci * and including the warranties of merchantability and fitness
228c2ecf20Sopenharmony_ci * for a particular purpose. Intel does not assume any
238c2ecf20Sopenharmony_ci * responsibility for and errors which may appear in this program
248c2ecf20Sopenharmony_ci * not any responsibility to update it.
258c2ecf20Sopenharmony_ci */
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci#include <linux/export.h>
288c2ecf20Sopenharmony_ci#include <linux/types.h>
298c2ecf20Sopenharmony_ci#include <linux/stddef.h>
308c2ecf20Sopenharmony_ci#include <linux/compiler.h>
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#include <linux/string.h>
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci#ifdef __HAVE_ARCH_MEMCPY
358c2ecf20Sopenharmony_ci#ifndef CONFIG_OPT_LIB_FUNCTION
368c2ecf20Sopenharmony_civoid *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
378c2ecf20Sopenharmony_ci{
388c2ecf20Sopenharmony_ci	const char *src = v_src;
398c2ecf20Sopenharmony_ci	char *dst = v_dst;
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	/* Simple, byte oriented memcpy. */
428c2ecf20Sopenharmony_ci	while (c--)
438c2ecf20Sopenharmony_ci		*dst++ = *src++;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	return v_dst;
468c2ecf20Sopenharmony_ci}
478c2ecf20Sopenharmony_ci#else /* CONFIG_OPT_LIB_FUNCTION */
488c2ecf20Sopenharmony_civoid *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
498c2ecf20Sopenharmony_ci{
508c2ecf20Sopenharmony_ci	const char *src = v_src;
518c2ecf20Sopenharmony_ci	char *dst = v_dst;
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	/* The following code tries to optimize the copy by using unsigned
548c2ecf20Sopenharmony_ci	 * alignment. This will work fine if both source and destination are
558c2ecf20Sopenharmony_ci	 * aligned on the same boundary. However, if they are aligned on
568c2ecf20Sopenharmony_ci	 * different boundaries shifts will be necessary. This might result in
578c2ecf20Sopenharmony_ci	 * bad performance on MicroBlaze systems without a barrel shifter.
588c2ecf20Sopenharmony_ci	 */
598c2ecf20Sopenharmony_ci	const uint32_t *i_src;
608c2ecf20Sopenharmony_ci	uint32_t *i_dst;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	if (likely(c >= 4)) {
638c2ecf20Sopenharmony_ci		unsigned  value, buf_hold;
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci		/* Align the destination to a word boundary. */
668c2ecf20Sopenharmony_ci		/* This is done in an endian independent manner. */
678c2ecf20Sopenharmony_ci		switch ((unsigned long)dst & 3) {
688c2ecf20Sopenharmony_ci		case 1:
698c2ecf20Sopenharmony_ci			*dst++ = *src++;
708c2ecf20Sopenharmony_ci			--c;
718c2ecf20Sopenharmony_ci		case 2:
728c2ecf20Sopenharmony_ci			*dst++ = *src++;
738c2ecf20Sopenharmony_ci			--c;
748c2ecf20Sopenharmony_ci		case 3:
758c2ecf20Sopenharmony_ci			*dst++ = *src++;
768c2ecf20Sopenharmony_ci			--c;
778c2ecf20Sopenharmony_ci		}
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci		i_dst = (void *)dst;
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci		/* Choose a copy scheme based on the source */
828c2ecf20Sopenharmony_ci		/* alignment relative to destination. */
838c2ecf20Sopenharmony_ci		switch ((unsigned long)src & 3) {
848c2ecf20Sopenharmony_ci		case 0x0:	/* Both byte offsets are aligned */
858c2ecf20Sopenharmony_ci			i_src  = (const void *)src;
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4)
888c2ecf20Sopenharmony_ci				*i_dst++ = *i_src++;
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci			src  = (const void *)i_src;
918c2ecf20Sopenharmony_ci			break;
928c2ecf20Sopenharmony_ci		case 0x1:	/* Unaligned - Off by 1 */
938c2ecf20Sopenharmony_ci			/* Word align the source */
948c2ecf20Sopenharmony_ci			i_src = (const void *) ((unsigned)src & ~3);
958c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__
968c2ecf20Sopenharmony_ci			/* Load the holding buffer */
978c2ecf20Sopenharmony_ci			buf_hold = *i_src++ << 8;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1008c2ecf20Sopenharmony_ci				value = *i_src++;
1018c2ecf20Sopenharmony_ci				*i_dst++ = buf_hold | value >> 24;
1028c2ecf20Sopenharmony_ci				buf_hold = value << 8;
1038c2ecf20Sopenharmony_ci			}
1048c2ecf20Sopenharmony_ci#else
1058c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1068c2ecf20Sopenharmony_ci			buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1098c2ecf20Sopenharmony_ci				value = *i_src++;
1108c2ecf20Sopenharmony_ci				*i_dst++ = buf_hold | ((value & 0xFF) << 24);
1118c2ecf20Sopenharmony_ci				buf_hold = (value & 0xFFFFFF00) >> 8;
1128c2ecf20Sopenharmony_ci			}
1138c2ecf20Sopenharmony_ci#endif
1148c2ecf20Sopenharmony_ci			/* Realign the source */
1158c2ecf20Sopenharmony_ci			src = (const void *)i_src;
1168c2ecf20Sopenharmony_ci			src -= 3;
1178c2ecf20Sopenharmony_ci			break;
1188c2ecf20Sopenharmony_ci		case 0x2:	/* Unaligned - Off by 2 */
1198c2ecf20Sopenharmony_ci			/* Word align the source */
1208c2ecf20Sopenharmony_ci			i_src = (const void *) ((unsigned)src & ~3);
1218c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__
1228c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1238c2ecf20Sopenharmony_ci			buf_hold = *i_src++ << 16;
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1268c2ecf20Sopenharmony_ci				value = *i_src++;
1278c2ecf20Sopenharmony_ci				*i_dst++ = buf_hold | value >> 16;
1288c2ecf20Sopenharmony_ci				buf_hold = value << 16;
1298c2ecf20Sopenharmony_ci			}
1308c2ecf20Sopenharmony_ci#else
1318c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1328c2ecf20Sopenharmony_ci			buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1358c2ecf20Sopenharmony_ci				value = *i_src++;
1368c2ecf20Sopenharmony_ci				*i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
1378c2ecf20Sopenharmony_ci				buf_hold = (value & 0xFFFF0000) >> 16;
1388c2ecf20Sopenharmony_ci			}
1398c2ecf20Sopenharmony_ci#endif
1408c2ecf20Sopenharmony_ci			/* Realign the source */
1418c2ecf20Sopenharmony_ci			src = (const void *)i_src;
1428c2ecf20Sopenharmony_ci			src -= 2;
1438c2ecf20Sopenharmony_ci			break;
1448c2ecf20Sopenharmony_ci		case 0x3:	/* Unaligned - Off by 3 */
1458c2ecf20Sopenharmony_ci			/* Word align the source */
1468c2ecf20Sopenharmony_ci			i_src = (const void *) ((unsigned)src & ~3);
1478c2ecf20Sopenharmony_ci#ifndef __MICROBLAZEEL__
1488c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1498c2ecf20Sopenharmony_ci			buf_hold = *i_src++ << 24;
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1528c2ecf20Sopenharmony_ci				value = *i_src++;
1538c2ecf20Sopenharmony_ci				*i_dst++ = buf_hold | value >> 8;
1548c2ecf20Sopenharmony_ci				buf_hold = value << 24;
1558c2ecf20Sopenharmony_ci			}
1568c2ecf20Sopenharmony_ci#else
1578c2ecf20Sopenharmony_ci			/* Load the holding buffer */
1588c2ecf20Sopenharmony_ci			buf_hold = (*i_src++ & 0xFF000000) >> 24;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci			for (; c >= 4; c -= 4) {
1618c2ecf20Sopenharmony_ci				value = *i_src++;
1628c2ecf20Sopenharmony_ci				*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
1638c2ecf20Sopenharmony_ci				buf_hold = (value & 0xFF000000) >> 24;
1648c2ecf20Sopenharmony_ci			}
1658c2ecf20Sopenharmony_ci#endif
1668c2ecf20Sopenharmony_ci			/* Realign the source */
1678c2ecf20Sopenharmony_ci			src = (const void *)i_src;
1688c2ecf20Sopenharmony_ci			src -= 1;
1698c2ecf20Sopenharmony_ci			break;
1708c2ecf20Sopenharmony_ci		}
1718c2ecf20Sopenharmony_ci		dst = (void *)i_dst;
1728c2ecf20Sopenharmony_ci	}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci	/* Finish off any remaining bytes */
1758c2ecf20Sopenharmony_ci	/* simple fast copy, ... unless a cache boundary is crossed */
1768c2ecf20Sopenharmony_ci	switch (c) {
1778c2ecf20Sopenharmony_ci	case 3:
1788c2ecf20Sopenharmony_ci		*dst++ = *src++;
1798c2ecf20Sopenharmony_ci	case 2:
1808c2ecf20Sopenharmony_ci		*dst++ = *src++;
1818c2ecf20Sopenharmony_ci	case 1:
1828c2ecf20Sopenharmony_ci		*dst++ = *src++;
1838c2ecf20Sopenharmony_ci	}
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	return v_dst;
1868c2ecf20Sopenharmony_ci}
1878c2ecf20Sopenharmony_ci#endif /* CONFIG_OPT_LIB_FUNCTION */
1888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy);
1898c2ecf20Sopenharmony_ci#endif /* __HAVE_ARCH_MEMCPY */
190