162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
362306a36Sopenharmony_ci * Copyright (C) 2008-2009 PetaLogix
462306a36Sopenharmony_ci * Copyright (C) 2007 John Williams
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Reasonably optimised generic C-code for memcpy on Microblaze
762306a36Sopenharmony_ci * This is generic C code to do efficient, alignment-aware memcpy.
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
1062306a36Sopenharmony_ci * http://www.embedded.com/showArticle.jhtml?articleID=19205567
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * Attempts were made, unsuccessfully, to contact the original
1362306a36Sopenharmony_ci * author of this code (Michael Morrow, Intel).  Below is the original
1462306a36Sopenharmony_ci * copyright notice.
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci * This software has been developed by Intel Corporation.
1762306a36Sopenharmony_ci * Intel specifically disclaims all warranties, express or
1862306a36Sopenharmony_ci * implied, and all liability, including consequential and
1962306a36Sopenharmony_ci * other indirect damages, for the use of this program, including
2062306a36Sopenharmony_ci * liability for infringement of any proprietary rights,
2162306a36Sopenharmony_ci * and including the warranties of merchantability and fitness
2262306a36Sopenharmony_ci * for a particular purpose. Intel does not assume any
2362306a36Sopenharmony_ci * responsibility for and errors which may appear in this program
2462306a36Sopenharmony_ci * not any responsibility to update it.
2562306a36Sopenharmony_ci */
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#include <linux/export.h>
2862306a36Sopenharmony_ci#include <linux/types.h>
2962306a36Sopenharmony_ci#include <linux/stddef.h>
3062306a36Sopenharmony_ci#include <linux/compiler.h>
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#include <linux/string.h>
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#ifdef CONFIG_OPT_LIB_FUNCTION
3562306a36Sopenharmony_civoid *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
3662306a36Sopenharmony_ci{
3762306a36Sopenharmony_ci	const char *src = v_src;
3862306a36Sopenharmony_ci	char *dst = v_dst;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	/* The following code tries to optimize the copy by using unsigned
4162306a36Sopenharmony_ci	 * alignment. This will work fine if both source and destination are
4262306a36Sopenharmony_ci	 * aligned on the same boundary. However, if they are aligned on
4362306a36Sopenharmony_ci	 * different boundaries shifts will be necessary. This might result in
4462306a36Sopenharmony_ci	 * bad performance on MicroBlaze systems without a barrel shifter.
4562306a36Sopenharmony_ci	 */
4662306a36Sopenharmony_ci	const uint32_t *i_src;
4762306a36Sopenharmony_ci	uint32_t *i_dst;
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	if (likely(c >= 4)) {
5062306a36Sopenharmony_ci		unsigned  value, buf_hold;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci		/* Align the destination to a word boundary. */
5362306a36Sopenharmony_ci		/* This is done in an endian independent manner. */
5462306a36Sopenharmony_ci		switch ((unsigned long)dst & 3) {
5562306a36Sopenharmony_ci		case 1:
5662306a36Sopenharmony_ci			*dst++ = *src++;
5762306a36Sopenharmony_ci			--c;
5862306a36Sopenharmony_ci			fallthrough;
5962306a36Sopenharmony_ci		case 2:
6062306a36Sopenharmony_ci			*dst++ = *src++;
6162306a36Sopenharmony_ci			--c;
6262306a36Sopenharmony_ci			fallthrough;
6362306a36Sopenharmony_ci		case 3:
6462306a36Sopenharmony_ci			*dst++ = *src++;
6562306a36Sopenharmony_ci			--c;
6662306a36Sopenharmony_ci		}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci		i_dst = (void *)dst;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci		/* Choose a copy scheme based on the source */
7162306a36Sopenharmony_ci		/* alignment relative to destination. */
7262306a36Sopenharmony_ci		switch ((unsigned long)src & 3) {
7362306a36Sopenharmony_ci		case 0x0:	/* Both byte offsets are aligned */
7462306a36Sopenharmony_ci			i_src  = (const void *)src;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci			for (; c >= 4; c -= 4)
7762306a36Sopenharmony_ci				*i_dst++ = *i_src++;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci			src  = (const void *)i_src;
8062306a36Sopenharmony_ci			break;
8162306a36Sopenharmony_ci		case 0x1:	/* Unaligned - Off by 1 */
8262306a36Sopenharmony_ci			/* Word align the source */
8362306a36Sopenharmony_ci			i_src = (const void *) ((unsigned)src & ~3);
8462306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__
8562306a36Sopenharmony_ci			/* Load the holding buffer */
8662306a36Sopenharmony_ci			buf_hold = *i_src++ << 8;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
8962306a36Sopenharmony_ci				value = *i_src++;
9062306a36Sopenharmony_ci				*i_dst++ = buf_hold | value >> 24;
9162306a36Sopenharmony_ci				buf_hold = value << 8;
9262306a36Sopenharmony_ci			}
9362306a36Sopenharmony_ci#else
9462306a36Sopenharmony_ci			/* Load the holding buffer */
9562306a36Sopenharmony_ci			buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
9862306a36Sopenharmony_ci				value = *i_src++;
9962306a36Sopenharmony_ci				*i_dst++ = buf_hold | ((value & 0xFF) << 24);
10062306a36Sopenharmony_ci				buf_hold = (value & 0xFFFFFF00) >> 8;
10162306a36Sopenharmony_ci			}
10262306a36Sopenharmony_ci#endif
10362306a36Sopenharmony_ci			/* Realign the source */
10462306a36Sopenharmony_ci			src = (const void *)i_src;
10562306a36Sopenharmony_ci			src -= 3;
10662306a36Sopenharmony_ci			break;
10762306a36Sopenharmony_ci		case 0x2:	/* Unaligned - Off by 2 */
10862306a36Sopenharmony_ci			/* Word align the source */
10962306a36Sopenharmony_ci			i_src = (const void *) ((unsigned)src & ~3);
11062306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__
11162306a36Sopenharmony_ci			/* Load the holding buffer */
11262306a36Sopenharmony_ci			buf_hold = *i_src++ << 16;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
11562306a36Sopenharmony_ci				value = *i_src++;
11662306a36Sopenharmony_ci				*i_dst++ = buf_hold | value >> 16;
11762306a36Sopenharmony_ci				buf_hold = value << 16;
11862306a36Sopenharmony_ci			}
11962306a36Sopenharmony_ci#else
12062306a36Sopenharmony_ci			/* Load the holding buffer */
12162306a36Sopenharmony_ci			buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
12462306a36Sopenharmony_ci				value = *i_src++;
12562306a36Sopenharmony_ci				*i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
12662306a36Sopenharmony_ci				buf_hold = (value & 0xFFFF0000) >> 16;
12762306a36Sopenharmony_ci			}
12862306a36Sopenharmony_ci#endif
12962306a36Sopenharmony_ci			/* Realign the source */
13062306a36Sopenharmony_ci			src = (const void *)i_src;
13162306a36Sopenharmony_ci			src -= 2;
13262306a36Sopenharmony_ci			break;
13362306a36Sopenharmony_ci		case 0x3:	/* Unaligned - Off by 3 */
13462306a36Sopenharmony_ci			/* Word align the source */
13562306a36Sopenharmony_ci			i_src = (const void *) ((unsigned)src & ~3);
13662306a36Sopenharmony_ci#ifndef __MICROBLAZEEL__
13762306a36Sopenharmony_ci			/* Load the holding buffer */
13862306a36Sopenharmony_ci			buf_hold = *i_src++ << 24;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
14162306a36Sopenharmony_ci				value = *i_src++;
14262306a36Sopenharmony_ci				*i_dst++ = buf_hold | value >> 8;
14362306a36Sopenharmony_ci				buf_hold = value << 24;
14462306a36Sopenharmony_ci			}
14562306a36Sopenharmony_ci#else
14662306a36Sopenharmony_ci			/* Load the holding buffer */
14762306a36Sopenharmony_ci			buf_hold = (*i_src++ & 0xFF000000) >> 24;
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci			for (; c >= 4; c -= 4) {
15062306a36Sopenharmony_ci				value = *i_src++;
15162306a36Sopenharmony_ci				*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
15262306a36Sopenharmony_ci				buf_hold = (value & 0xFF000000) >> 24;
15362306a36Sopenharmony_ci			}
15462306a36Sopenharmony_ci#endif
15562306a36Sopenharmony_ci			/* Realign the source */
15662306a36Sopenharmony_ci			src = (const void *)i_src;
15762306a36Sopenharmony_ci			src -= 1;
15862306a36Sopenharmony_ci			break;
15962306a36Sopenharmony_ci		}
16062306a36Sopenharmony_ci		dst = (void *)i_dst;
16162306a36Sopenharmony_ci	}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	/* Finish off any remaining bytes */
16462306a36Sopenharmony_ci	/* simple fast copy, ... unless a cache boundary is crossed */
16562306a36Sopenharmony_ci	switch (c) {
16662306a36Sopenharmony_ci	case 3:
16762306a36Sopenharmony_ci		*dst++ = *src++;
16862306a36Sopenharmony_ci		fallthrough;
16962306a36Sopenharmony_ci	case 2:
17062306a36Sopenharmony_ci		*dst++ = *src++;
17162306a36Sopenharmony_ci		fallthrough;
17262306a36Sopenharmony_ci	case 1:
17362306a36Sopenharmony_ci		*dst++ = *src++;
17462306a36Sopenharmony_ci	}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	return v_dst;
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ciEXPORT_SYMBOL(memcpy);
17962306a36Sopenharmony_ci#endif /* CONFIG_OPT_LIB_FUNCTION */
180