162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
362306a36Sopenharmony_ci * Copyright (C) 2008-2009 PetaLogix
462306a36Sopenharmony_ci * Copyright (C) 2007 John Williams
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Reasonably optimised generic C-code for memcpy on Microblaze
762306a36Sopenharmony_ci * This is generic C code to do efficient, alignment-aware memmove.
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
1062306a36Sopenharmony_ci * http://www.embedded.com/showArticle.jhtml?articleID=19205567
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * Attempts were made, unsuccessfully, to contact the original
1362306a36Sopenharmony_ci * author of this code (Michael Morrow, Intel).  Below is the original
1462306a36Sopenharmony_ci * copyright notice.
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci * This software has been developed by Intel Corporation.
1762306a36Sopenharmony_ci * Intel specifically disclaims all warranties, express or
1862306a36Sopenharmony_ci * implied, and all liability, including consequential and
1962306a36Sopenharmony_ci * other indirect damages, for the use of this program, including
2062306a36Sopenharmony_ci * liability for infringement of any proprietary rights,
2162306a36Sopenharmony_ci * and including the warranties of merchantability and fitness
2262306a36Sopenharmony_ci * for a particular purpose. Intel does not assume any
2362306a36Sopenharmony_ci * responsibility for and errors which may appear in this program
2462306a36Sopenharmony_ci * not any responsibility to update it.
2562306a36Sopenharmony_ci */
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#include <linux/export.h>
2862306a36Sopenharmony_ci#include <linux/types.h>
2962306a36Sopenharmony_ci#include <linux/stddef.h>
3062306a36Sopenharmony_ci#include <linux/compiler.h>
3162306a36Sopenharmony_ci#include <linux/string.h>
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#ifdef CONFIG_OPT_LIB_FUNCTION
3462306a36Sopenharmony_civoid *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
3562306a36Sopenharmony_ci{
3662306a36Sopenharmony_ci	const char *src = v_src;
3762306a36Sopenharmony_ci	char *dst = v_dst;
3862306a36Sopenharmony_ci	const uint32_t *i_src;
3962306a36Sopenharmony_ci	uint32_t *i_dst;
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	if (!c)
4262306a36Sopenharmony_ci		return v_dst;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	/* Use memcpy when source is higher than dest */
4562306a36Sopenharmony_ci	if (v_dst <= v_src)
4662306a36Sopenharmony_ci		return memcpy(v_dst, v_src, c);
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	/* The following code tries to optimize the copy by using unsigned
4962306a36Sopenharmony_ci	 * alignment. This will work fine if both source and destination are
5062306a36Sopenharmony_ci	 * aligned on the same boundary. However, if they are aligned on
5162306a36Sopenharmony_ci	 * different boundaries shifts will be necessary. This might result in
5262306a36Sopenharmony_ci	 * bad performance on MicroBlaze systems without a barrel shifter.
5362306a36Sopenharmony_ci	 */
5462306a36Sopenharmony_ci	/* FIXME this part needs more test */
5562306a36Sopenharmony_ci	/* Do a descending copy - this is a bit trickier! */
5662306a36Sopenharmony_ci	dst += c;
5762306a36Sopenharmony_ci	src += c;
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	if (c >= 4) {
6062306a36Sopenharmony_ci		unsigned  value, buf_hold;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci		/* Align the destination to a word boundary. */
6362306a36Sopenharmony_ci		/* This is done in an endian independent manner. */
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci		switch ((unsigned long)dst & 3) {
6662306a36Sopenharmony_ci		case 3:
6762306a36Sopenharmony_ci			*--dst = *--src;
6862306a36Sopenharmony_ci			--c;
6962306a36Sopenharmony_ci			fallthrough;
7062306a36Sopenharmony_ci		case 2:
7162306a36Sopenharmony_ci			*--dst = *--src;
7262306a36Sopenharmony_ci			--c;
7362306a36Sopenharmony_ci			fallthrough;
7462306a36Sopenharmony_ci		case 1:
7562306a36Sopenharmony_ci			*--dst = *--src;
7662306a36Sopenharmony_ci			--c;
7762306a36Sopenharmony_ci		}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci		i_dst = (void *)dst;
8062306a36Sopenharmony_ci		/* Choose a copy scheme based on the source */
8162306a36Sopenharmony_ci		/* alignment relative to destination. */
8262306a36Sopenharmony_ci		switch ((unsigned long)src & 3) {
8362306a36Sopenharmony_ci		case 0x0:	/* Both byte offsets are aligned */
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci			i_src  = (const void *)src;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci			for (; c >= 4; c -= 4)
8862306a36Sopenharmony_ci				*--i_dst = *--i_src;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci			src  = (const void *)i_src;
9162306a36Sopenharmony_ci			break;
9262306a36Sopenharmony_ci		case 0x1:	/* Unaligned - Off by 1 */
9362306a36Sopenharmony_ci			/* Word align the source */
9462306a36Sopenharmony_ci			i_src = (const void *) (((unsigned)src + 4) & ~3);
9562306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__
9662306a36Sopenharmony_ci			/* Load the holding buffer */
9762306a36Sopenharmony_ci			buf_hold = *--i_src >> 24;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
10062306a36Sopenharmony_ci				value = *--i_src;
10162306a36Sopenharmony_ci				*--i_dst = buf_hold << 8 | value;
10262306a36Sopenharmony_ci				buf_hold = value >> 24;
10362306a36Sopenharmony_ci			}
10462306a36Sopenharmony_ci#else
10562306a36Sopenharmony_ci			/* Load the holding buffer */
10662306a36Sopenharmony_ci			buf_hold = (*--i_src & 0xFF) << 24;
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
10962306a36Sopenharmony_ci				value = *--i_src;
11062306a36Sopenharmony_ci				*--i_dst = buf_hold |
11162306a36Sopenharmony_ci						((value & 0xFFFFFF00) >> 8);
11262306a36Sopenharmony_ci				buf_hold = (value  & 0xFF) << 24;
11362306a36Sopenharmony_ci			}
11462306a36Sopenharmony_ci#endif
11562306a36Sopenharmony_ci			/* Realign the source */
11662306a36Sopenharmony_ci			src = (const void *)i_src;
11762306a36Sopenharmony_ci			src += 1;
11862306a36Sopenharmony_ci			break;
11962306a36Sopenharmony_ci		case 0x2:	/* Unaligned - Off by 2 */
12062306a36Sopenharmony_ci			/* Word align the source */
12162306a36Sopenharmony_ci			i_src = (const void *) (((unsigned)src + 4) & ~3);
12262306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__
12362306a36Sopenharmony_ci			/* Load the holding buffer */
12462306a36Sopenharmony_ci			buf_hold = *--i_src >> 16;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
12762306a36Sopenharmony_ci				value = *--i_src;
12862306a36Sopenharmony_ci				*--i_dst = buf_hold << 16 | value;
12962306a36Sopenharmony_ci				buf_hold = value >> 16;
13062306a36Sopenharmony_ci			}
13162306a36Sopenharmony_ci#else
13262306a36Sopenharmony_ci			/* Load the holding buffer */
13362306a36Sopenharmony_ci			buf_hold = (*--i_src & 0xFFFF) << 16;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
13662306a36Sopenharmony_ci				value = *--i_src;
13762306a36Sopenharmony_ci				*--i_dst = buf_hold |
13862306a36Sopenharmony_ci						((value & 0xFFFF0000) >> 16);
13962306a36Sopenharmony_ci				buf_hold = (value & 0xFFFF) << 16;
14062306a36Sopenharmony_ci			}
14162306a36Sopenharmony_ci#endif
14262306a36Sopenharmony_ci			/* Realign the source */
14362306a36Sopenharmony_ci			src = (const void *)i_src;
14462306a36Sopenharmony_ci			src += 2;
14562306a36Sopenharmony_ci			break;
14662306a36Sopenharmony_ci		case 0x3:	/* Unaligned - Off by 3 */
14762306a36Sopenharmony_ci			/* Word align the source */
14862306a36Sopenharmony_ci			i_src = (const void *) (((unsigned)src + 4) & ~3);
14962306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__
15062306a36Sopenharmony_ci			/* Load the holding buffer */
15162306a36Sopenharmony_ci			buf_hold = *--i_src >> 8;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
15462306a36Sopenharmony_ci				value = *--i_src;
15562306a36Sopenharmony_ci				*--i_dst = buf_hold << 24 | value;
15662306a36Sopenharmony_ci				buf_hold = value >> 8;
15762306a36Sopenharmony_ci			}
15862306a36Sopenharmony_ci#else
15962306a36Sopenharmony_ci			/* Load the holding buffer */
16062306a36Sopenharmony_ci			buf_hold = (*--i_src & 0xFFFFFF) << 8;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
16362306a36Sopenharmony_ci				value = *--i_src;
16462306a36Sopenharmony_ci				*--i_dst = buf_hold |
16562306a36Sopenharmony_ci						((value & 0xFF000000) >> 24);
16662306a36Sopenharmony_ci				buf_hold = (value & 0xFFFFFF) << 8;
16762306a36Sopenharmony_ci			}
16862306a36Sopenharmony_ci#endif
16962306a36Sopenharmony_ci			/* Realign the source */
17062306a36Sopenharmony_ci			src = (const void *)i_src;
17162306a36Sopenharmony_ci			src += 3;
17262306a36Sopenharmony_ci			break;
17362306a36Sopenharmony_ci		}
17462306a36Sopenharmony_ci		dst = (void *)i_dst;
17562306a36Sopenharmony_ci	}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	/* simple fast copy, ... unless a cache boundary is crossed */
17862306a36Sopenharmony_ci	/* Finish off any remaining bytes */
17962306a36Sopenharmony_ci	switch (c) {
18062306a36Sopenharmony_ci	case 4:
18162306a36Sopenharmony_ci		*--dst = *--src;
18262306a36Sopenharmony_ci		fallthrough;
18362306a36Sopenharmony_ci	case 3:
18462306a36Sopenharmony_ci		*--dst = *--src;
18562306a36Sopenharmony_ci		fallthrough;
18662306a36Sopenharmony_ci	case 2:
18762306a36Sopenharmony_ci		*--dst = *--src;
18862306a36Sopenharmony_ci		fallthrough;
18962306a36Sopenharmony_ci	case 1:
19062306a36Sopenharmony_ci		*--dst = *--src;
19162306a36Sopenharmony_ci	}
19262306a36Sopenharmony_ci	return v_dst;
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ciEXPORT_SYMBOL(memmove);
19562306a36Sopenharmony_ci#endif /* CONFIG_OPT_LIB_FUNCTION */
196