1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * arch/alpha/lib/memmove.S
4  *
5  * Barely optimized memmove routine for Alpha EV5.
6  *
7  * This is hand-massaged output from the original memcpy.c.  We defer to
8  * memcpy whenever possible; the backwards copy loops are not unrolled.
9  */
10 #include <linux/export.h>
11 	.set noat
12 	.set noreorder
13 	.text
14 
15 	.align 4
16 	.globl memmove
17 	.ent memmove
18 memmove:
19 	ldgp $29, 0($27)
20 	unop
21 	nop
22 	.prologue 1
23 
24 	addq $16,$18,$4
25 	addq $17,$18,$5
26 	cmpule $4,$17,$1		/*  dest + n <= src  */
27 	cmpule $5,$16,$2		/*  dest >= src + n  */
28 
29 	bis $1,$2,$1
30 	mov $16,$0
31 	xor $16,$17,$2
32 	bne $1,memcpy			!samegp
33 
34 	and $2,7,$2			/* Test for src/dest co-alignment.  */
35 	and $16,7,$1
36 	cmpule $16,$17,$3
37 	bne $3,$memmove_up		/* dest < src */
38 
39 	and $4,7,$1
40 	bne $2,$misaligned_dn
41 	unop
42 	beq $1,$skip_aligned_byte_loop_head_dn
43 
44 $aligned_byte_loop_head_dn:
45 	lda $4,-1($4)
46 	lda $5,-1($5)
47 	unop
48 	ble $18,$egress
49 
50 	ldq_u $3,0($5)
51 	ldq_u $2,0($4)
52 	lda $18,-1($18)
53 	extbl $3,$5,$1
54 
55 	insbl $1,$4,$1
56 	mskbl $2,$4,$2
57 	bis $1,$2,$1
58 	and $4,7,$6
59 
60 	stq_u $1,0($4)
61 	bne $6,$aligned_byte_loop_head_dn
62 
63 $skip_aligned_byte_loop_head_dn:
64 	lda $18,-8($18)
65 	blt $18,$skip_aligned_word_loop_dn
66 
67 $aligned_word_loop_dn:
68 	ldq $1,-8($5)
69 	nop
70 	lda $5,-8($5)
71 	lda $18,-8($18)
72 
73 	stq $1,-8($4)
74 	nop
75 	lda $4,-8($4)
76 	bge $18,$aligned_word_loop_dn
77 
78 $skip_aligned_word_loop_dn:
79 	lda $18,8($18)
80 	bgt $18,$byte_loop_tail_dn
81 	unop
82 	ret $31,($26),1
83 
84 	.align 4
85 $misaligned_dn:
86 	nop
87 	fnop
88 	unop
89 	beq $18,$egress
90 
91 $byte_loop_tail_dn:
92 	ldq_u $3,-1($5)
93 	ldq_u $2,-1($4)
94 	lda $5,-1($5)
95 	lda $4,-1($4)
96 
97 	lda $18,-1($18)
98 	extbl $3,$5,$1
99 	insbl $1,$4,$1
100 	mskbl $2,$4,$2
101 
102 	bis $1,$2,$1
103 	stq_u $1,0($4)
104 	bgt $18,$byte_loop_tail_dn
105 	br $egress
106 
107 $memmove_up:
108 	mov $16,$4
109 	mov $17,$5
110 	bne $2,$misaligned_up
111 	beq $1,$skip_aligned_byte_loop_head_up
112 
113 $aligned_byte_loop_head_up:
114 	unop
115 	ble $18,$egress
116 	ldq_u $3,0($5)
117 	ldq_u $2,0($4)
118 
119 	lda $18,-1($18)
120 	extbl $3,$5,$1
121 	insbl $1,$4,$1
122 	mskbl $2,$4,$2
123 
124 	bis $1,$2,$1
125 	lda $5,1($5)
126 	stq_u $1,0($4)
127 	lda $4,1($4)
128 
129 	and $4,7,$6
130 	bne $6,$aligned_byte_loop_head_up
131 
132 $skip_aligned_byte_loop_head_up:
133 	lda $18,-8($18)
134 	blt $18,$skip_aligned_word_loop_up
135 
136 $aligned_word_loop_up:
137 	ldq $1,0($5)
138 	nop
139 	lda $5,8($5)
140 	lda $18,-8($18)
141 
142 	stq $1,0($4)
143 	nop
144 	lda $4,8($4)
145 	bge $18,$aligned_word_loop_up
146 
147 $skip_aligned_word_loop_up:
148 	lda $18,8($18)
149 	bgt $18,$byte_loop_tail_up
150 	unop
151 	ret $31,($26),1
152 
153 	.align 4
154 $misaligned_up:
155 	nop
156 	fnop
157 	unop
158 	beq $18,$egress
159 
160 $byte_loop_tail_up:
161 	ldq_u $3,0($5)
162 	ldq_u $2,0($4)
163 	lda $18,-1($18)
164 	extbl $3,$5,$1
165 
166 	insbl $1,$4,$1
167 	mskbl $2,$4,$2
168 	bis $1,$2,$1
169 	stq_u $1,0($4)
170 
171 	lda $5,1($5)
172 	lda $4,1($4)
173 	nop
174 	bgt $18,$byte_loop_tail_up
175 
176 $egress:
177 	ret $31,($26),1
178 	nop
179 	nop
180 	nop
181 
182 	.end memmove
183 	EXPORT_SYMBOL(memmove)
184