162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/linkage.h>
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__
962306a36Sopenharmony_ci# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
1062306a36Sopenharmony_ci# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
1162306a36Sopenharmony_ci# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
1262306a36Sopenharmony_ci# define MERGE_2(RX,RY,IMM)
1362306a36Sopenharmony_ci# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
1462306a36Sopenharmony_ci# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
1562306a36Sopenharmony_ci#else
1662306a36Sopenharmony_ci# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
1762306a36Sopenharmony_ci# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
1862306a36Sopenharmony_ci# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
1962306a36Sopenharmony_ci# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
2062306a36Sopenharmony_ci# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
2162306a36Sopenharmony_ci# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
2262306a36Sopenharmony_ci#endif
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#ifdef CONFIG_ARC_HAS_LL64
2562306a36Sopenharmony_ci# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
2662306a36Sopenharmony_ci# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
2762306a36Sopenharmony_ci# define ZOLSHFT		5
2862306a36Sopenharmony_ci# define ZOLAND			0x1F
2962306a36Sopenharmony_ci#else
3062306a36Sopenharmony_ci# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
3162306a36Sopenharmony_ci# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
3262306a36Sopenharmony_ci# define ZOLSHFT		4
3362306a36Sopenharmony_ci# define ZOLAND			0xF
3462306a36Sopenharmony_ci#endif
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ciENTRY_CFI(memcpy)
3762306a36Sopenharmony_ci	mov.f	0, r2
3862306a36Sopenharmony_ci;;; if size is zero
3962306a36Sopenharmony_ci	jz.d	[blink]
4062306a36Sopenharmony_ci	mov	r3, r0		; don;t clobber ret val
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci;;; if size <= 8
4362306a36Sopenharmony_ci	cmp	r2, 8
4462306a36Sopenharmony_ci	bls.d	@.Lsmallchunk
4562306a36Sopenharmony_ci	mov.f	lp_count, r2
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	and.f	r4, r0, 0x03
4862306a36Sopenharmony_ci	rsub	lp_count, r4, 4
4962306a36Sopenharmony_ci	lpnz	@.Laligndestination
5062306a36Sopenharmony_ci	;; LOOP BEGIN
5162306a36Sopenharmony_ci	ldb.ab	r5, [r1,1]
5262306a36Sopenharmony_ci	sub	r2, r2, 1
5362306a36Sopenharmony_ci	stb.ab	r5, [r3,1]
5462306a36Sopenharmony_ci.Laligndestination:
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci;;; Check the alignment of the source
5762306a36Sopenharmony_ci	and.f	r4, r1, 0x03
5862306a36Sopenharmony_ci	bnz.d	@.Lsourceunaligned
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci;;; CASE 0: Both source and destination are 32bit aligned
6162306a36Sopenharmony_ci;;; Convert len to Dwords, unfold x4
6262306a36Sopenharmony_ci	lsr.f	lp_count, r2, ZOLSHFT
6362306a36Sopenharmony_ci	lpnz	@.Lcopy32_64bytes
6462306a36Sopenharmony_ci	;; LOOP START
6562306a36Sopenharmony_ci	LOADX (r6, r1)
6662306a36Sopenharmony_ci	LOADX (r8, r1)
6762306a36Sopenharmony_ci	LOADX (r10, r1)
6862306a36Sopenharmony_ci	LOADX (r4, r1)
6962306a36Sopenharmony_ci	STOREX (r6, r3)
7062306a36Sopenharmony_ci	STOREX (r8, r3)
7162306a36Sopenharmony_ci	STOREX (r10, r3)
7262306a36Sopenharmony_ci	STOREX (r4, r3)
7362306a36Sopenharmony_ci.Lcopy32_64bytes:
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
7662306a36Sopenharmony_ci.Lsmallchunk:
7762306a36Sopenharmony_ci	lpnz	@.Lcopyremainingbytes
7862306a36Sopenharmony_ci	;; LOOP START
7962306a36Sopenharmony_ci	ldb.ab	r5, [r1,1]
8062306a36Sopenharmony_ci	stb.ab	r5, [r3,1]
8162306a36Sopenharmony_ci.Lcopyremainingbytes:
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	j	[blink]
8462306a36Sopenharmony_ci;;; END CASE 0
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci.Lsourceunaligned:
8762306a36Sopenharmony_ci	cmp	r4, 2
8862306a36Sopenharmony_ci	beq.d	@.LunalignedOffby2
8962306a36Sopenharmony_ci	sub	r2, r2, 1
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	bhi.d	@.LunalignedOffby3
9262306a36Sopenharmony_ci	ldb.ab	r5, [r1, 1]
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci;;; CASE 1: The source is unaligned, off by 1
9562306a36Sopenharmony_ci	;; Hence I need to read 1 byte for a 16bit alignment
9662306a36Sopenharmony_ci	;; and 2bytes to reach 32bit alignment
9762306a36Sopenharmony_ci	ldh.ab	r6, [r1, 2]
9862306a36Sopenharmony_ci	sub	r2, r2, 2
9962306a36Sopenharmony_ci	;; Convert to words, unfold x2
10062306a36Sopenharmony_ci	lsr.f	lp_count, r2, 3
10162306a36Sopenharmony_ci	MERGE_1 (r6, r6, 8)
10262306a36Sopenharmony_ci	MERGE_2 (r5, r5, 24)
10362306a36Sopenharmony_ci	or	r5, r5, r6
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	;; Both src and dst are aligned
10662306a36Sopenharmony_ci	lpnz	@.Lcopy8bytes_1
10762306a36Sopenharmony_ci	;; LOOP START
10862306a36Sopenharmony_ci	ld.ab	r6, [r1, 4]
10962306a36Sopenharmony_ci	ld.ab	r8, [r1,4]
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	SHIFT_1	(r7, r6, 24)
11262306a36Sopenharmony_ci	or	r7, r7, r5
11362306a36Sopenharmony_ci	SHIFT_2	(r5, r6, 8)
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	SHIFT_1	(r9, r8, 24)
11662306a36Sopenharmony_ci	or	r9, r9, r5
11762306a36Sopenharmony_ci	SHIFT_2	(r5, r8, 8)
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	st.ab	r7, [r3, 4]
12062306a36Sopenharmony_ci	st.ab	r9, [r3, 4]
12162306a36Sopenharmony_ci.Lcopy8bytes_1:
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	;; Write back the remaining 16bits
12462306a36Sopenharmony_ci	EXTRACT_1 (r6, r5, 16)
12562306a36Sopenharmony_ci	sth.ab	r6, [r3, 2]
12662306a36Sopenharmony_ci	;; Write back the remaining 8bits
12762306a36Sopenharmony_ci	EXTRACT_2 (r5, r5, 16)
12862306a36Sopenharmony_ci	stb.ab	r5, [r3, 1]
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	and.f	lp_count, r2, 0x07 ;Last 8bytes
13162306a36Sopenharmony_ci	lpnz	@.Lcopybytewise_1
13262306a36Sopenharmony_ci	;; LOOP START
13362306a36Sopenharmony_ci	ldb.ab	r6, [r1,1]
13462306a36Sopenharmony_ci	stb.ab	r6, [r3,1]
13562306a36Sopenharmony_ci.Lcopybytewise_1:
13662306a36Sopenharmony_ci	j	[blink]
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci.LunalignedOffby2:
13962306a36Sopenharmony_ci;;; CASE 2: The source is unaligned, off by 2
14062306a36Sopenharmony_ci	ldh.ab	r5, [r1, 2]
14162306a36Sopenharmony_ci	sub	r2, r2, 1
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	;; Both src and dst are aligned
14462306a36Sopenharmony_ci	;; Convert to words, unfold x2
14562306a36Sopenharmony_ci	lsr.f	lp_count, r2, 3
14662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
14762306a36Sopenharmony_ci	asl.nz	r5, r5, 16
14862306a36Sopenharmony_ci#endif
14962306a36Sopenharmony_ci	lpnz	@.Lcopy8bytes_2
15062306a36Sopenharmony_ci	;; LOOP START
15162306a36Sopenharmony_ci	ld.ab	r6, [r1, 4]
15262306a36Sopenharmony_ci	ld.ab	r8, [r1,4]
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	SHIFT_1	(r7, r6, 16)
15562306a36Sopenharmony_ci	or	r7, r7, r5
15662306a36Sopenharmony_ci	SHIFT_2	(r5, r6, 16)
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	SHIFT_1	(r9, r8, 16)
15962306a36Sopenharmony_ci	or	r9, r9, r5
16062306a36Sopenharmony_ci	SHIFT_2	(r5, r8, 16)
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	st.ab	r7, [r3, 4]
16362306a36Sopenharmony_ci	st.ab	r9, [r3, 4]
16462306a36Sopenharmony_ci.Lcopy8bytes_2:
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
16762306a36Sopenharmony_ci	lsr.nz	r5, r5, 16
16862306a36Sopenharmony_ci#endif
16962306a36Sopenharmony_ci	sth.ab	r5, [r3, 2]
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	and.f	lp_count, r2, 0x07 ;Last 8bytes
17262306a36Sopenharmony_ci	lpnz	@.Lcopybytewise_2
17362306a36Sopenharmony_ci	;; LOOP START
17462306a36Sopenharmony_ci	ldb.ab	r6, [r1,1]
17562306a36Sopenharmony_ci	stb.ab	r6, [r3,1]
17662306a36Sopenharmony_ci.Lcopybytewise_2:
17762306a36Sopenharmony_ci	j	[blink]
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci.LunalignedOffby3:
18062306a36Sopenharmony_ci;;; CASE 3: The source is unaligned, off by 3
18162306a36Sopenharmony_ci;;; Hence, I need to read 1byte for achieve the 32bit alignment
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	;; Both src and dst are aligned
18462306a36Sopenharmony_ci	;; Convert to words, unfold x2
18562306a36Sopenharmony_ci	lsr.f	lp_count, r2, 3
18662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
18762306a36Sopenharmony_ci	asl.ne	r5, r5, 24
18862306a36Sopenharmony_ci#endif
18962306a36Sopenharmony_ci	lpnz	@.Lcopy8bytes_3
19062306a36Sopenharmony_ci	;; LOOP START
19162306a36Sopenharmony_ci	ld.ab	r6, [r1, 4]
19262306a36Sopenharmony_ci	ld.ab	r8, [r1,4]
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	SHIFT_1	(r7, r6, 8)
19562306a36Sopenharmony_ci	or	r7, r7, r5
19662306a36Sopenharmony_ci	SHIFT_2	(r5, r6, 24)
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	SHIFT_1	(r9, r8, 8)
19962306a36Sopenharmony_ci	or	r9, r9, r5
20062306a36Sopenharmony_ci	SHIFT_2	(r5, r8, 24)
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	st.ab	r7, [r3, 4]
20362306a36Sopenharmony_ci	st.ab	r9, [r3, 4]
20462306a36Sopenharmony_ci.Lcopy8bytes_3:
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci#ifdef __BIG_ENDIAN__
20762306a36Sopenharmony_ci	lsr.nz	r5, r5, 24
20862306a36Sopenharmony_ci#endif
20962306a36Sopenharmony_ci	stb.ab	r5, [r3, 1]
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	and.f	lp_count, r2, 0x07 ;Last 8bytes
21262306a36Sopenharmony_ci	lpnz	@.Lcopybytewise_3
21362306a36Sopenharmony_ci	;; LOOP START
21462306a36Sopenharmony_ci	ldb.ab	r6, [r1,1]
21562306a36Sopenharmony_ci	stb.ab	r6, [r3,1]
21662306a36Sopenharmony_ci.Lcopybytewise_3:
21762306a36Sopenharmony_ci	j	[blink]
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ciEND_CFI(memcpy)
220