1570af302Sopenharmony_ci#if !__ARMEL__
2570af302Sopenharmony_ci#include <string.h>
3570af302Sopenharmony_ci#include <stdint.h>
4570af302Sopenharmony_ci#include <endian.h>
5570af302Sopenharmony_ci
6570af302Sopenharmony_civoid *memcpy(void *restrict dest, const void *restrict src, size_t n)
7570af302Sopenharmony_ci{
8570af302Sopenharmony_ci	unsigned char *d = dest;
9570af302Sopenharmony_ci	const unsigned char *s = src;
10570af302Sopenharmony_ci
11570af302Sopenharmony_ci#ifdef __GNUC__
12570af302Sopenharmony_ci
13570af302Sopenharmony_ci#if __BYTE_ORDER == __LITTLE_ENDIAN
14570af302Sopenharmony_ci#define LS >>
15570af302Sopenharmony_ci#define RS <<
16570af302Sopenharmony_ci#else
17570af302Sopenharmony_ci#define LS <<
18570af302Sopenharmony_ci#define RS >>
19570af302Sopenharmony_ci#endif
20570af302Sopenharmony_ci
21570af302Sopenharmony_ci	typedef uint32_t __attribute__((__may_alias__)) u32;
22570af302Sopenharmony_ci	uint32_t w, x;
23570af302Sopenharmony_ci
24570af302Sopenharmony_ci	for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
25570af302Sopenharmony_ci
26570af302Sopenharmony_ci	if ((uintptr_t)d % 4 == 0) {
27570af302Sopenharmony_ci		for (; n>=16; s+=16, d+=16, n-=16) {
28570af302Sopenharmony_ci			*(u32 *)(d+0) = *(u32 *)(s+0);
29570af302Sopenharmony_ci			*(u32 *)(d+4) = *(u32 *)(s+4);
30570af302Sopenharmony_ci			*(u32 *)(d+8) = *(u32 *)(s+8);
31570af302Sopenharmony_ci			*(u32 *)(d+12) = *(u32 *)(s+12);
32570af302Sopenharmony_ci		}
33570af302Sopenharmony_ci		if (n&8) {
34570af302Sopenharmony_ci			*(u32 *)(d+0) = *(u32 *)(s+0);
35570af302Sopenharmony_ci			*(u32 *)(d+4) = *(u32 *)(s+4);
36570af302Sopenharmony_ci			d += 8; s += 8;
37570af302Sopenharmony_ci		}
38570af302Sopenharmony_ci		if (n&4) {
39570af302Sopenharmony_ci			*(u32 *)(d+0) = *(u32 *)(s+0);
40570af302Sopenharmony_ci			d += 4; s += 4;
41570af302Sopenharmony_ci		}
42570af302Sopenharmony_ci		if (n&2) {
43570af302Sopenharmony_ci			*d++ = *s++; *d++ = *s++;
44570af302Sopenharmony_ci		}
45570af302Sopenharmony_ci		if (n&1) {
46570af302Sopenharmony_ci			*d = *s;
47570af302Sopenharmony_ci		}
48570af302Sopenharmony_ci		return dest;
49570af302Sopenharmony_ci	}
50570af302Sopenharmony_ci
51570af302Sopenharmony_ci	if (n >= 32) switch ((uintptr_t)d % 4) {
52570af302Sopenharmony_ci	case 1:
53570af302Sopenharmony_ci		w = *(u32 *)s;
54570af302Sopenharmony_ci		*d++ = *s++;
55570af302Sopenharmony_ci		*d++ = *s++;
56570af302Sopenharmony_ci		*d++ = *s++;
57570af302Sopenharmony_ci		n -= 3;
58570af302Sopenharmony_ci		for (; n>=17; s+=16, d+=16, n-=16) {
59570af302Sopenharmony_ci			x = *(u32 *)(s+1);
60570af302Sopenharmony_ci			*(u32 *)(d+0) = (w LS 24) | (x RS 8);
61570af302Sopenharmony_ci			w = *(u32 *)(s+5);
62570af302Sopenharmony_ci			*(u32 *)(d+4) = (x LS 24) | (w RS 8);
63570af302Sopenharmony_ci			x = *(u32 *)(s+9);
64570af302Sopenharmony_ci			*(u32 *)(d+8) = (w LS 24) | (x RS 8);
65570af302Sopenharmony_ci			w = *(u32 *)(s+13);
66570af302Sopenharmony_ci			*(u32 *)(d+12) = (x LS 24) | (w RS 8);
67570af302Sopenharmony_ci		}
68570af302Sopenharmony_ci		break;
69570af302Sopenharmony_ci	case 2:
70570af302Sopenharmony_ci		w = *(u32 *)s;
71570af302Sopenharmony_ci		*d++ = *s++;
72570af302Sopenharmony_ci		*d++ = *s++;
73570af302Sopenharmony_ci		n -= 2;
74570af302Sopenharmony_ci		for (; n>=18; s+=16, d+=16, n-=16) {
75570af302Sopenharmony_ci			x = *(u32 *)(s+2);
76570af302Sopenharmony_ci			*(u32 *)(d+0) = (w LS 16) | (x RS 16);
77570af302Sopenharmony_ci			w = *(u32 *)(s+6);
78570af302Sopenharmony_ci			*(u32 *)(d+4) = (x LS 16) | (w RS 16);
79570af302Sopenharmony_ci			x = *(u32 *)(s+10);
80570af302Sopenharmony_ci			*(u32 *)(d+8) = (w LS 16) | (x RS 16);
81570af302Sopenharmony_ci			w = *(u32 *)(s+14);
82570af302Sopenharmony_ci			*(u32 *)(d+12) = (x LS 16) | (w RS 16);
83570af302Sopenharmony_ci		}
84570af302Sopenharmony_ci		break;
85570af302Sopenharmony_ci	case 3:
86570af302Sopenharmony_ci		w = *(u32 *)s;
87570af302Sopenharmony_ci		*d++ = *s++;
88570af302Sopenharmony_ci		n -= 1;
89570af302Sopenharmony_ci		for (; n>=19; s+=16, d+=16, n-=16) {
90570af302Sopenharmony_ci			x = *(u32 *)(s+3);
91570af302Sopenharmony_ci			*(u32 *)(d+0) = (w LS 8) | (x RS 24);
92570af302Sopenharmony_ci			w = *(u32 *)(s+7);
93570af302Sopenharmony_ci			*(u32 *)(d+4) = (x LS 8) | (w RS 24);
94570af302Sopenharmony_ci			x = *(u32 *)(s+11);
95570af302Sopenharmony_ci			*(u32 *)(d+8) = (w LS 8) | (x RS 24);
96570af302Sopenharmony_ci			w = *(u32 *)(s+15);
97570af302Sopenharmony_ci			*(u32 *)(d+12) = (x LS 8) | (w RS 24);
98570af302Sopenharmony_ci		}
99570af302Sopenharmony_ci		break;
100570af302Sopenharmony_ci	}
101570af302Sopenharmony_ci	if (n&16) {
102570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
103570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
104570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
105570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
106570af302Sopenharmony_ci	}
107570af302Sopenharmony_ci	if (n&8) {
108570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
109570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
110570af302Sopenharmony_ci	}
111570af302Sopenharmony_ci	if (n&4) {
112570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
113570af302Sopenharmony_ci	}
114570af302Sopenharmony_ci	if (n&2) {
115570af302Sopenharmony_ci		*d++ = *s++; *d++ = *s++;
116570af302Sopenharmony_ci	}
117570af302Sopenharmony_ci	if (n&1) {
118570af302Sopenharmony_ci		*d = *s;
119570af302Sopenharmony_ci	}
120570af302Sopenharmony_ci	return dest;
121570af302Sopenharmony_ci#endif
122570af302Sopenharmony_ci
123570af302Sopenharmony_ci	for (; n; n--) *d++ = *s++;
124570af302Sopenharmony_ci	return dest;
125570af302Sopenharmony_ci}
126570af302Sopenharmony_ci#endif
127