18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * arch/alpha/lib/memmove.S
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Barely optimized memmove routine for Alpha EV5.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * This is hand-massaged output from the original memcpy.c.  We defer to
88c2ecf20Sopenharmony_ci * memcpy whenever possible; the backwards copy loops are not unrolled.
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci#include <asm/export.h>
118c2ecf20Sopenharmony_ci	.set noat
128c2ecf20Sopenharmony_ci	.set noreorder
138c2ecf20Sopenharmony_ci	.text
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci	.align 4
168c2ecf20Sopenharmony_ci	.globl memmove
178c2ecf20Sopenharmony_ci	.ent memmove
188c2ecf20Sopenharmony_cimemmove:
198c2ecf20Sopenharmony_ci	ldgp $29, 0($27)
208c2ecf20Sopenharmony_ci	unop
218c2ecf20Sopenharmony_ci	nop
228c2ecf20Sopenharmony_ci	.prologue 1
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci	addq $16,$18,$4
258c2ecf20Sopenharmony_ci	addq $17,$18,$5
268c2ecf20Sopenharmony_ci	cmpule $4,$17,$1		/*  dest + n <= src  */
278c2ecf20Sopenharmony_ci	cmpule $5,$16,$2		/*  dest >= src + n  */
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	bis $1,$2,$1
308c2ecf20Sopenharmony_ci	mov $16,$0
318c2ecf20Sopenharmony_ci	xor $16,$17,$2
328c2ecf20Sopenharmony_ci	bne $1,memcpy			!samegp
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci	and $2,7,$2			/* Test for src/dest co-alignment.  */
358c2ecf20Sopenharmony_ci	and $16,7,$1
368c2ecf20Sopenharmony_ci	cmpule $16,$17,$3
378c2ecf20Sopenharmony_ci	bne $3,$memmove_up		/* dest < src */
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	and $4,7,$1
408c2ecf20Sopenharmony_ci	bne $2,$misaligned_dn
418c2ecf20Sopenharmony_ci	unop
428c2ecf20Sopenharmony_ci	beq $1,$skip_aligned_byte_loop_head_dn
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci$aligned_byte_loop_head_dn:
458c2ecf20Sopenharmony_ci	lda $4,-1($4)
468c2ecf20Sopenharmony_ci	lda $5,-1($5)
478c2ecf20Sopenharmony_ci	unop
488c2ecf20Sopenharmony_ci	ble $18,$egress
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	ldq_u $3,0($5)
518c2ecf20Sopenharmony_ci	ldq_u $2,0($4)
528c2ecf20Sopenharmony_ci	lda $18,-1($18)
538c2ecf20Sopenharmony_ci	extbl $3,$5,$1
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	insbl $1,$4,$1
568c2ecf20Sopenharmony_ci	mskbl $2,$4,$2
578c2ecf20Sopenharmony_ci	bis $1,$2,$1
588c2ecf20Sopenharmony_ci	and $4,7,$6
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	stq_u $1,0($4)
618c2ecf20Sopenharmony_ci	bne $6,$aligned_byte_loop_head_dn
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci$skip_aligned_byte_loop_head_dn:
648c2ecf20Sopenharmony_ci	lda $18,-8($18)
658c2ecf20Sopenharmony_ci	blt $18,$skip_aligned_word_loop_dn
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci$aligned_word_loop_dn:
688c2ecf20Sopenharmony_ci	ldq $1,-8($5)
698c2ecf20Sopenharmony_ci	nop
708c2ecf20Sopenharmony_ci	lda $5,-8($5)
718c2ecf20Sopenharmony_ci	lda $18,-8($18)
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	stq $1,-8($4)
748c2ecf20Sopenharmony_ci	nop
758c2ecf20Sopenharmony_ci	lda $4,-8($4)
768c2ecf20Sopenharmony_ci	bge $18,$aligned_word_loop_dn
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci$skip_aligned_word_loop_dn:
798c2ecf20Sopenharmony_ci	lda $18,8($18)
808c2ecf20Sopenharmony_ci	bgt $18,$byte_loop_tail_dn
818c2ecf20Sopenharmony_ci	unop
828c2ecf20Sopenharmony_ci	ret $31,($26),1
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	.align 4
858c2ecf20Sopenharmony_ci$misaligned_dn:
868c2ecf20Sopenharmony_ci	nop
878c2ecf20Sopenharmony_ci	fnop
888c2ecf20Sopenharmony_ci	unop
898c2ecf20Sopenharmony_ci	beq $18,$egress
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci$byte_loop_tail_dn:
928c2ecf20Sopenharmony_ci	ldq_u $3,-1($5)
938c2ecf20Sopenharmony_ci	ldq_u $2,-1($4)
948c2ecf20Sopenharmony_ci	lda $5,-1($5)
958c2ecf20Sopenharmony_ci	lda $4,-1($4)
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	lda $18,-1($18)
988c2ecf20Sopenharmony_ci	extbl $3,$5,$1
998c2ecf20Sopenharmony_ci	insbl $1,$4,$1
1008c2ecf20Sopenharmony_ci	mskbl $2,$4,$2
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	bis $1,$2,$1
1038c2ecf20Sopenharmony_ci	stq_u $1,0($4)
1048c2ecf20Sopenharmony_ci	bgt $18,$byte_loop_tail_dn
1058c2ecf20Sopenharmony_ci	br $egress
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci$memmove_up:
1088c2ecf20Sopenharmony_ci	mov $16,$4
1098c2ecf20Sopenharmony_ci	mov $17,$5
1108c2ecf20Sopenharmony_ci	bne $2,$misaligned_up
1118c2ecf20Sopenharmony_ci	beq $1,$skip_aligned_byte_loop_head_up
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci$aligned_byte_loop_head_up:
1148c2ecf20Sopenharmony_ci	unop
1158c2ecf20Sopenharmony_ci	ble $18,$egress
1168c2ecf20Sopenharmony_ci	ldq_u $3,0($5)
1178c2ecf20Sopenharmony_ci	ldq_u $2,0($4)
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	lda $18,-1($18)
1208c2ecf20Sopenharmony_ci	extbl $3,$5,$1
1218c2ecf20Sopenharmony_ci	insbl $1,$4,$1
1228c2ecf20Sopenharmony_ci	mskbl $2,$4,$2
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	bis $1,$2,$1
1258c2ecf20Sopenharmony_ci	lda $5,1($5)
1268c2ecf20Sopenharmony_ci	stq_u $1,0($4)
1278c2ecf20Sopenharmony_ci	lda $4,1($4)
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	and $4,7,$6
1308c2ecf20Sopenharmony_ci	bne $6,$aligned_byte_loop_head_up
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci$skip_aligned_byte_loop_head_up:
1338c2ecf20Sopenharmony_ci	lda $18,-8($18)
1348c2ecf20Sopenharmony_ci	blt $18,$skip_aligned_word_loop_up
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci$aligned_word_loop_up:
1378c2ecf20Sopenharmony_ci	ldq $1,0($5)
1388c2ecf20Sopenharmony_ci	nop
1398c2ecf20Sopenharmony_ci	lda $5,8($5)
1408c2ecf20Sopenharmony_ci	lda $18,-8($18)
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	stq $1,0($4)
1438c2ecf20Sopenharmony_ci	nop
1448c2ecf20Sopenharmony_ci	lda $4,8($4)
1458c2ecf20Sopenharmony_ci	bge $18,$aligned_word_loop_up
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci$skip_aligned_word_loop_up:
1488c2ecf20Sopenharmony_ci	lda $18,8($18)
1498c2ecf20Sopenharmony_ci	bgt $18,$byte_loop_tail_up
1508c2ecf20Sopenharmony_ci	unop
1518c2ecf20Sopenharmony_ci	ret $31,($26),1
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	.align 4
1548c2ecf20Sopenharmony_ci$misaligned_up:
1558c2ecf20Sopenharmony_ci	nop
1568c2ecf20Sopenharmony_ci	fnop
1578c2ecf20Sopenharmony_ci	unop
1588c2ecf20Sopenharmony_ci	beq $18,$egress
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci$byte_loop_tail_up:
1618c2ecf20Sopenharmony_ci	ldq_u $3,0($5)
1628c2ecf20Sopenharmony_ci	ldq_u $2,0($4)
1638c2ecf20Sopenharmony_ci	lda $18,-1($18)
1648c2ecf20Sopenharmony_ci	extbl $3,$5,$1
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	insbl $1,$4,$1
1678c2ecf20Sopenharmony_ci	mskbl $2,$4,$2
1688c2ecf20Sopenharmony_ci	bis $1,$2,$1
1698c2ecf20Sopenharmony_ci	stq_u $1,0($4)
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	lda $5,1($5)
1728c2ecf20Sopenharmony_ci	lda $4,1($4)
1738c2ecf20Sopenharmony_ci	nop
1748c2ecf20Sopenharmony_ci	bgt $18,$byte_loop_tail_up
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci$egress:
1778c2ecf20Sopenharmony_ci	ret $31,($26),1
1788c2ecf20Sopenharmony_ci	nop
1798c2ecf20Sopenharmony_ci	nop
1808c2ecf20Sopenharmony_ci	nop
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	.end memmove
1838c2ecf20Sopenharmony_ci	EXPORT_SYMBOL(memmove)
184