18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
38c2ecf20Sopenharmony_ci
48c2ecf20Sopenharmony_ci#include <linux/linkage.h>
58c2ecf20Sopenharmony_ci#include "sysdep.h"
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ciENTRY(__memcpy)
88c2ecf20Sopenharmony_ciENTRY(memcpy)
98c2ecf20Sopenharmony_ci	/* Test if len less than 4 bytes.  */
108c2ecf20Sopenharmony_ci	mov	r12, r0
118c2ecf20Sopenharmony_ci	cmplti	r2, 4
128c2ecf20Sopenharmony_ci	bt	.L_copy_by_byte
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci	andi	r13, r0, 3
158c2ecf20Sopenharmony_ci	movi	r19, 4
168c2ecf20Sopenharmony_ci	/* Test if dest is not 4 bytes aligned.  */
178c2ecf20Sopenharmony_ci	bnez	r13, .L_dest_not_aligned
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci/* Hardware can handle unaligned access directly.  */
208c2ecf20Sopenharmony_ci.L_dest_aligned:
218c2ecf20Sopenharmony_ci	/* If dest is aligned, then copy.  */
228c2ecf20Sopenharmony_ci	zext	r18, r2, 31, 4
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci	/* Test if len less than 16 bytes.  */
258c2ecf20Sopenharmony_ci	bez	r18, .L_len_less_16bytes
268c2ecf20Sopenharmony_ci	movi	r19, 0
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	LABLE_ALIGN
298c2ecf20Sopenharmony_ci.L_len_larger_16bytes:
308c2ecf20Sopenharmony_ci#if defined(__CK860__)
318c2ecf20Sopenharmony_ci	ldw	r3, (r1, 0)
328c2ecf20Sopenharmony_ci	stw	r3, (r0, 0)
338c2ecf20Sopenharmony_ci	ldw	r3, (r1, 4)
348c2ecf20Sopenharmony_ci	stw	r3, (r0, 4)
358c2ecf20Sopenharmony_ci	ldw	r3, (r1, 8)
368c2ecf20Sopenharmony_ci	stw	r3, (r0, 8)
378c2ecf20Sopenharmony_ci	ldw	r3, (r1, 12)
388c2ecf20Sopenharmony_ci	addi	r1, 16
398c2ecf20Sopenharmony_ci	stw	r3, (r0, 12)
408c2ecf20Sopenharmony_ci	addi	r0, 16
418c2ecf20Sopenharmony_ci#else
428c2ecf20Sopenharmony_ci	ldw	r20, (r1, 0)
438c2ecf20Sopenharmony_ci	ldw	r21, (r1, 4)
448c2ecf20Sopenharmony_ci	ldw	r22, (r1, 8)
458c2ecf20Sopenharmony_ci	ldw	r23, (r1, 12)
468c2ecf20Sopenharmony_ci	stw	r20, (r0, 0)
478c2ecf20Sopenharmony_ci	stw	r21, (r0, 4)
488c2ecf20Sopenharmony_ci	stw	r22, (r0, 8)
498c2ecf20Sopenharmony_ci	stw	r23, (r0, 12)
508c2ecf20Sopenharmony_ci	PRE_BNEZAD (r18)
518c2ecf20Sopenharmony_ci	addi	r1, 16
528c2ecf20Sopenharmony_ci	addi	r0, 16
538c2ecf20Sopenharmony_ci#endif
548c2ecf20Sopenharmony_ci	BNEZAD (r18, .L_len_larger_16bytes)
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci.L_len_less_16bytes:
578c2ecf20Sopenharmony_ci	zext	r18, r2, 3, 2
588c2ecf20Sopenharmony_ci	bez	r18, .L_copy_by_byte
598c2ecf20Sopenharmony_ci.L_len_less_16bytes_loop:
608c2ecf20Sopenharmony_ci	ldw	r3, (r1, 0)
618c2ecf20Sopenharmony_ci	PRE_BNEZAD (r18)
628c2ecf20Sopenharmony_ci	addi	r1, 4
638c2ecf20Sopenharmony_ci	stw	r3, (r0, 0)
648c2ecf20Sopenharmony_ci	addi	r0, 4
658c2ecf20Sopenharmony_ci	BNEZAD (r18, .L_len_less_16bytes_loop)
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci/* Test if len less than 4 bytes.  */
688c2ecf20Sopenharmony_ci.L_copy_by_byte:
698c2ecf20Sopenharmony_ci	zext	r18, r2, 1, 0
708c2ecf20Sopenharmony_ci	bez	r18, .L_return
718c2ecf20Sopenharmony_ci.L_copy_by_byte_loop:
728c2ecf20Sopenharmony_ci	ldb	r3, (r1, 0)
738c2ecf20Sopenharmony_ci	PRE_BNEZAD (r18)
748c2ecf20Sopenharmony_ci	addi	r1, 1
758c2ecf20Sopenharmony_ci	stb	r3, (r0, 0)
768c2ecf20Sopenharmony_ci	addi	r0, 1
778c2ecf20Sopenharmony_ci	BNEZAD (r18, .L_copy_by_byte_loop)
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci.L_return:
808c2ecf20Sopenharmony_ci	mov	r0, r12
818c2ecf20Sopenharmony_ci	rts
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci/*
848c2ecf20Sopenharmony_ci * If dest is not aligned, just copying some bytes makes the
858c2ecf20Sopenharmony_ci * dest align.
868c2ecf20Sopenharmony_ci */
878c2ecf20Sopenharmony_ci.L_dest_not_aligned:
888c2ecf20Sopenharmony_ci	sub	r13, r19, r13
898c2ecf20Sopenharmony_ci	sub	r2, r13
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci/* Makes the dest align.  */
928c2ecf20Sopenharmony_ci.L_dest_not_aligned_loop:
938c2ecf20Sopenharmony_ci	ldb	r3, (r1, 0)
948c2ecf20Sopenharmony_ci	PRE_BNEZAD (r13)
958c2ecf20Sopenharmony_ci	addi	r1, 1
968c2ecf20Sopenharmony_ci	stb	r3, (r0, 0)
978c2ecf20Sopenharmony_ci	addi	r0, 1
988c2ecf20Sopenharmony_ci	BNEZAD (r13, .L_dest_not_aligned_loop)
998c2ecf20Sopenharmony_ci	cmplti	r2, 4
1008c2ecf20Sopenharmony_ci	bt	.L_copy_by_byte
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	/* Check whether the src is aligned.  */
1038c2ecf20Sopenharmony_ci	jbr	.L_dest_aligned
1048c2ecf20Sopenharmony_ciENDPROC(__memcpy)
105