18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * arch/alpha/lib/memmove.S 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Barely optimized memmove routine for Alpha EV5. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * This is hand-massaged output from the original memcpy.c. We defer to 88c2ecf20Sopenharmony_ci * memcpy whenever possible; the backwards copy loops are not unrolled. 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci#include <asm/export.h> 118c2ecf20Sopenharmony_ci .set noat 128c2ecf20Sopenharmony_ci .set noreorder 138c2ecf20Sopenharmony_ci .text 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci .align 4 168c2ecf20Sopenharmony_ci .globl memmove 178c2ecf20Sopenharmony_ci .ent memmove 188c2ecf20Sopenharmony_cimemmove: 198c2ecf20Sopenharmony_ci ldgp $29, 0($27) 208c2ecf20Sopenharmony_ci unop 218c2ecf20Sopenharmony_ci nop 228c2ecf20Sopenharmony_ci .prologue 1 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci addq $16,$18,$4 258c2ecf20Sopenharmony_ci addq $17,$18,$5 268c2ecf20Sopenharmony_ci cmpule $4,$17,$1 /* dest + n <= src */ 278c2ecf20Sopenharmony_ci cmpule $5,$16,$2 /* dest >= src + n */ 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci bis $1,$2,$1 308c2ecf20Sopenharmony_ci mov $16,$0 318c2ecf20Sopenharmony_ci xor $16,$17,$2 328c2ecf20Sopenharmony_ci bne $1,memcpy !samegp 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci and $2,7,$2 /* Test for src/dest co-alignment. */ 358c2ecf20Sopenharmony_ci and $16,7,$1 368c2ecf20Sopenharmony_ci cmpule $16,$17,$3 378c2ecf20Sopenharmony_ci bne $3,$memmove_up /* dest < src */ 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci and $4,7,$1 408c2ecf20Sopenharmony_ci bne $2,$misaligned_dn 418c2ecf20Sopenharmony_ci unop 428c2ecf20Sopenharmony_ci beq $1,$skip_aligned_byte_loop_head_dn 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci$aligned_byte_loop_head_dn: 458c2ecf20Sopenharmony_ci lda $4,-1($4) 468c2ecf20Sopenharmony_ci lda $5,-1($5) 478c2ecf20Sopenharmony_ci unop 488c2ecf20Sopenharmony_ci ble $18,$egress 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci ldq_u $3,0($5) 518c2ecf20Sopenharmony_ci ldq_u $2,0($4) 528c2ecf20Sopenharmony_ci lda $18,-1($18) 538c2ecf20Sopenharmony_ci extbl $3,$5,$1 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci insbl $1,$4,$1 568c2ecf20Sopenharmony_ci mskbl $2,$4,$2 578c2ecf20Sopenharmony_ci bis $1,$2,$1 588c2ecf20Sopenharmony_ci and $4,7,$6 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci stq_u $1,0($4) 618c2ecf20Sopenharmony_ci bne $6,$aligned_byte_loop_head_dn 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci$skip_aligned_byte_loop_head_dn: 648c2ecf20Sopenharmony_ci lda $18,-8($18) 658c2ecf20Sopenharmony_ci blt $18,$skip_aligned_word_loop_dn 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci$aligned_word_loop_dn: 688c2ecf20Sopenharmony_ci ldq $1,-8($5) 698c2ecf20Sopenharmony_ci nop 708c2ecf20Sopenharmony_ci lda $5,-8($5) 718c2ecf20Sopenharmony_ci lda $18,-8($18) 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci stq $1,-8($4) 748c2ecf20Sopenharmony_ci nop 758c2ecf20Sopenharmony_ci lda $4,-8($4) 768c2ecf20Sopenharmony_ci bge $18,$aligned_word_loop_dn 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci$skip_aligned_word_loop_dn: 798c2ecf20Sopenharmony_ci lda $18,8($18) 808c2ecf20Sopenharmony_ci bgt $18,$byte_loop_tail_dn 818c2ecf20Sopenharmony_ci unop 828c2ecf20Sopenharmony_ci ret $31,($26),1 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci .align 4 858c2ecf20Sopenharmony_ci$misaligned_dn: 868c2ecf20Sopenharmony_ci nop 878c2ecf20Sopenharmony_ci fnop 888c2ecf20Sopenharmony_ci unop 898c2ecf20Sopenharmony_ci beq $18,$egress 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci$byte_loop_tail_dn: 928c2ecf20Sopenharmony_ci ldq_u $3,-1($5) 938c2ecf20Sopenharmony_ci ldq_u $2,-1($4) 948c2ecf20Sopenharmony_ci lda $5,-1($5) 958c2ecf20Sopenharmony_ci lda $4,-1($4) 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci lda $18,-1($18) 988c2ecf20Sopenharmony_ci extbl $3,$5,$1 998c2ecf20Sopenharmony_ci insbl $1,$4,$1 1008c2ecf20Sopenharmony_ci mskbl $2,$4,$2 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci bis $1,$2,$1 1038c2ecf20Sopenharmony_ci stq_u $1,0($4) 1048c2ecf20Sopenharmony_ci bgt $18,$byte_loop_tail_dn 1058c2ecf20Sopenharmony_ci br $egress 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci$memmove_up: 1088c2ecf20Sopenharmony_ci mov $16,$4 1098c2ecf20Sopenharmony_ci mov $17,$5 1108c2ecf20Sopenharmony_ci bne $2,$misaligned_up 1118c2ecf20Sopenharmony_ci beq $1,$skip_aligned_byte_loop_head_up 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci$aligned_byte_loop_head_up: 1148c2ecf20Sopenharmony_ci unop 1158c2ecf20Sopenharmony_ci ble $18,$egress 1168c2ecf20Sopenharmony_ci ldq_u $3,0($5) 1178c2ecf20Sopenharmony_ci ldq_u $2,0($4) 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci lda $18,-1($18) 1208c2ecf20Sopenharmony_ci extbl $3,$5,$1 1218c2ecf20Sopenharmony_ci insbl $1,$4,$1 1228c2ecf20Sopenharmony_ci mskbl $2,$4,$2 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci bis $1,$2,$1 1258c2ecf20Sopenharmony_ci lda $5,1($5) 1268c2ecf20Sopenharmony_ci stq_u $1,0($4) 1278c2ecf20Sopenharmony_ci lda $4,1($4) 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci and $4,7,$6 1308c2ecf20Sopenharmony_ci bne $6,$aligned_byte_loop_head_up 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci$skip_aligned_byte_loop_head_up: 1338c2ecf20Sopenharmony_ci lda $18,-8($18) 1348c2ecf20Sopenharmony_ci blt $18,$skip_aligned_word_loop_up 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci$aligned_word_loop_up: 1378c2ecf20Sopenharmony_ci ldq $1,0($5) 1388c2ecf20Sopenharmony_ci nop 1398c2ecf20Sopenharmony_ci lda $5,8($5) 1408c2ecf20Sopenharmony_ci lda $18,-8($18) 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci stq $1,0($4) 1438c2ecf20Sopenharmony_ci nop 1448c2ecf20Sopenharmony_ci lda $4,8($4) 1458c2ecf20Sopenharmony_ci bge $18,$aligned_word_loop_up 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci$skip_aligned_word_loop_up: 1488c2ecf20Sopenharmony_ci lda $18,8($18) 1498c2ecf20Sopenharmony_ci bgt $18,$byte_loop_tail_up 1508c2ecf20Sopenharmony_ci unop 1518c2ecf20Sopenharmony_ci ret $31,($26),1 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci .align 4 1548c2ecf20Sopenharmony_ci$misaligned_up: 1558c2ecf20Sopenharmony_ci nop 1568c2ecf20Sopenharmony_ci fnop 1578c2ecf20Sopenharmony_ci unop 1588c2ecf20Sopenharmony_ci beq $18,$egress 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci$byte_loop_tail_up: 1618c2ecf20Sopenharmony_ci ldq_u $3,0($5) 1628c2ecf20Sopenharmony_ci ldq_u $2,0($4) 1638c2ecf20Sopenharmony_ci lda $18,-1($18) 1648c2ecf20Sopenharmony_ci extbl $3,$5,$1 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci insbl $1,$4,$1 1678c2ecf20Sopenharmony_ci mskbl $2,$4,$2 1688c2ecf20Sopenharmony_ci bis $1,$2,$1 1698c2ecf20Sopenharmony_ci stq_u $1,0($4) 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci lda $5,1($5) 1728c2ecf20Sopenharmony_ci lda $4,1($4) 1738c2ecf20Sopenharmony_ci nop 1748c2ecf20Sopenharmony_ci bgt $18,$byte_loop_tail_up 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci$egress: 1778c2ecf20Sopenharmony_ci ret $31,($26),1 1788c2ecf20Sopenharmony_ci nop 1798c2ecf20Sopenharmony_ci nop 1808c2ecf20Sopenharmony_ci nop 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci .end memmove 1838c2ecf20Sopenharmony_ci EXPORT_SYMBOL(memmove) 184