1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  *  linux/arch/arm/lib/csumpartialcopygeneric.S
4  *
5  *  Copyright (C) 1995-2001 Russell King
6  */
7 #include <asm/assembler.h>
8 
9 /*
10  * unsigned int
11  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12  *  r0 = src, r1 = dst, r2 = len, r3 = sum
13  *  Returns : r0 = checksum
14  *
15  * Note that 'tst' and 'teq' preserve the carry flag.
16  */
17 
18 src	.req	r0
19 dst	.req	r1
20 len	.req	r2
21 sum	.req	r3
22 
23 .Lzero:		mov	r0, sum
24 		load_regs
25 
26 		/*
27 		 * Align an unaligned destination pointer.  We know that
28 		 * we have >= 8 bytes here, so we don't need to check
29 		 * the length.  Note that the source pointer hasn't been
30 		 * aligned yet.
31 		 */
32 .Ldst_unaligned:
33 		tst	dst, #1
34 		beq	.Ldst_16bit
35 
36 		load1b	ip
37 		sub	len, len, #1
38 		adcs	sum, sum, ip, put_byte_1	@ update checksum
39 		strb	ip, [dst], #1
40 		tst	dst, #2
41 		reteq	lr			@ dst is now 32bit aligned
42 
43 .Ldst_16bit:	load2b	r8, ip
44 		sub	len, len, #2
45 		adcs	sum, sum, r8, put_byte_0
46 		strb	r8, [dst], #1
47 		adcs	sum, sum, ip, put_byte_1
48 		strb	ip, [dst], #1
49 		ret	lr			@ dst is now 32bit aligned
50 
51 		/*
52 		 * Handle 0 to 7 bytes, with any alignment of source and
53 		 * destination pointers.  Note that when we get here, C = 0
54 		 */
55 .Lless8:	teq	len, #0			@ check for zero count
56 		beq	.Lzero
57 
58 		/* we must have at least one byte. */
59 		tst	dst, #1			@ dst 16-bit aligned
60 		beq	.Lless8_aligned
61 
62 		/* Align dst */
63 		load1b	ip
64 		sub	len, len, #1
65 		adcs	sum, sum, ip, put_byte_1	@ update checksum
66 		strb	ip, [dst], #1
67 		tst	len, #6
68 		beq	.Lless8_byteonly
69 
70 1:		load2b	r8, ip
71 		sub	len, len, #2
72 		adcs	sum, sum, r8, put_byte_0
73 		strb	r8, [dst], #1
74 		adcs	sum, sum, ip, put_byte_1
75 		strb	ip, [dst], #1
76 .Lless8_aligned:
77 		tst	len, #6
78 		bne	1b
79 .Lless8_byteonly:
80 		tst	len, #1
81 		beq	.Ldone
82 		load1b	r8
83 		adcs	sum, sum, r8, put_byte_0	@ update checksum
84 		strb	r8, [dst], #1
85 		b	.Ldone
86 
87 FN_ENTRY
88 		save_regs
89 		mov	sum, #-1
90 
91 		cmp	len, #8			@ Ensure that we have at least
92 		blo	.Lless8			@ 8 bytes to copy.
93 
94 		adds	sum, sum, #0		@ C = 0
95 		tst	dst, #3			@ Test destination alignment
96 		blne	.Ldst_unaligned		@ align destination, return here
97 
98 		/*
99 		 * Ok, the dst pointer is now 32bit aligned, and we know
100 		 * that we must have more than 4 bytes to copy.  Note
101 		 * that C contains the carry from the dst alignment above.
102 		 */
103 
104 		tst	src, #3			@ Test source alignment
105 		bne	.Lsrc_not_aligned
106 
107 		/* Routine for src & dst aligned */
108 
109 		bics	ip, len, #15
110 		beq	2f
111 
112 1:		load4l	r4, r5, r6, r7
113 		stmia	dst!, {r4, r5, r6, r7}
114 		adcs	sum, sum, r4
115 		adcs	sum, sum, r5
116 		adcs	sum, sum, r6
117 		adcs	sum, sum, r7
118 		sub	ip, ip, #16
119 		teq	ip, #0
120 		bne	1b
121 
122 2:		ands	ip, len, #12
123 		beq	4f
124 		tst	ip, #8
125 		beq	3f
126 		load2l	r4, r5
127 		stmia	dst!, {r4, r5}
128 		adcs	sum, sum, r4
129 		adcs	sum, sum, r5
130 		tst	ip, #4
131 		beq	4f
132 
133 3:		load1l	r4
134 		str	r4, [dst], #4
135 		adcs	sum, sum, r4
136 
137 4:		ands	len, len, #3
138 		beq	.Ldone
139 		load1l	r4
140 		tst	len, #2
141 		mov	r5, r4, get_byte_0
142 		beq	.Lexit
143 		adcs	sum, sum, r4, lspush #16
144 		strb	r5, [dst], #1
145 		mov	r5, r4, get_byte_1
146 		strb	r5, [dst], #1
147 		mov	r5, r4, get_byte_2
148 .Lexit:		tst	len, #1
149 		strbne	r5, [dst], #1
150 		andne	r5, r5, #255
151 		adcsne	sum, sum, r5, put_byte_0
152 
153 		/*
154 		 * If the dst pointer was not 16-bit aligned, we
155 		 * need to rotate the checksum here to get around
156 		 * the inefficient byte manipulations in the
157 		 * architecture independent code.
158 		 */
159 .Ldone:		adc	r0, sum, #0
160 		ldr	sum, [sp, #0]		@ dst
161 		tst	sum, #1
162 		movne	r0, r0, ror #8
163 		load_regs
164 
165 .Lsrc_not_aligned:
166 		adc	sum, sum, #0		@ include C from dst alignment
167 		and	ip, src, #3
168 		bic	src, src, #3
169 		load1l	r5
170 		cmp	ip, #2
171 		beq	.Lsrc2_aligned
172 		bhi	.Lsrc3_aligned
173 		mov	r4, r5, lspull #8		@ C = 0
174 		bics	ip, len, #15
175 		beq	2f
176 1:		load4l	r5, r6, r7, r8
177 		orr	r4, r4, r5, lspush #24
178 		mov	r5, r5, lspull #8
179 		orr	r5, r5, r6, lspush #24
180 		mov	r6, r6, lspull #8
181 		orr	r6, r6, r7, lspush #24
182 		mov	r7, r7, lspull #8
183 		orr	r7, r7, r8, lspush #24
184 		stmia	dst!, {r4, r5, r6, r7}
185 		adcs	sum, sum, r4
186 		adcs	sum, sum, r5
187 		adcs	sum, sum, r6
188 		adcs	sum, sum, r7
189 		mov	r4, r8, lspull #8
190 		sub	ip, ip, #16
191 		teq	ip, #0
192 		bne	1b
193 2:		ands	ip, len, #12
194 		beq	4f
195 		tst	ip, #8
196 		beq	3f
197 		load2l	r5, r6
198 		orr	r4, r4, r5, lspush #24
199 		mov	r5, r5, lspull #8
200 		orr	r5, r5, r6, lspush #24
201 		stmia	dst!, {r4, r5}
202 		adcs	sum, sum, r4
203 		adcs	sum, sum, r5
204 		mov	r4, r6, lspull #8
205 		tst	ip, #4
206 		beq	4f
207 3:		load1l	r5
208 		orr	r4, r4, r5, lspush #24
209 		str	r4, [dst], #4
210 		adcs	sum, sum, r4
211 		mov	r4, r5, lspull #8
212 4:		ands	len, len, #3
213 		beq	.Ldone
214 		mov	r5, r4, get_byte_0
215 		tst	len, #2
216 		beq	.Lexit
217 		adcs	sum, sum, r4, lspush #16
218 		strb	r5, [dst], #1
219 		mov	r5, r4, get_byte_1
220 		strb	r5, [dst], #1
221 		mov	r5, r4, get_byte_2
222 		b	.Lexit
223 
224 .Lsrc2_aligned:	mov	r4, r5, lspull #16
225 		adds	sum, sum, #0
226 		bics	ip, len, #15
227 		beq	2f
228 1:		load4l	r5, r6, r7, r8
229 		orr	r4, r4, r5, lspush #16
230 		mov	r5, r5, lspull #16
231 		orr	r5, r5, r6, lspush #16
232 		mov	r6, r6, lspull #16
233 		orr	r6, r6, r7, lspush #16
234 		mov	r7, r7, lspull #16
235 		orr	r7, r7, r8, lspush #16
236 		stmia	dst!, {r4, r5, r6, r7}
237 		adcs	sum, sum, r4
238 		adcs	sum, sum, r5
239 		adcs	sum, sum, r6
240 		adcs	sum, sum, r7
241 		mov	r4, r8, lspull #16
242 		sub	ip, ip, #16
243 		teq	ip, #0
244 		bne	1b
245 2:		ands	ip, len, #12
246 		beq	4f
247 		tst	ip, #8
248 		beq	3f
249 		load2l	r5, r6
250 		orr	r4, r4, r5, lspush #16
251 		mov	r5, r5, lspull #16
252 		orr	r5, r5, r6, lspush #16
253 		stmia	dst!, {r4, r5}
254 		adcs	sum, sum, r4
255 		adcs	sum, sum, r5
256 		mov	r4, r6, lspull #16
257 		tst	ip, #4
258 		beq	4f
259 3:		load1l	r5
260 		orr	r4, r4, r5, lspush #16
261 		str	r4, [dst], #4
262 		adcs	sum, sum, r4
263 		mov	r4, r5, lspull #16
264 4:		ands	len, len, #3
265 		beq	.Ldone
266 		mov	r5, r4, get_byte_0
267 		tst	len, #2
268 		beq	.Lexit
269 		adcs	sum, sum, r4
270 		strb	r5, [dst], #1
271 		mov	r5, r4, get_byte_1
272 		strb	r5, [dst], #1
273 		tst	len, #1
274 		beq	.Ldone
275 		load1b	r5
276 		b	.Lexit
277 
278 .Lsrc3_aligned:	mov	r4, r5, lspull #24
279 		adds	sum, sum, #0
280 		bics	ip, len, #15
281 		beq	2f
282 1:		load4l	r5, r6, r7, r8
283 		orr	r4, r4, r5, lspush #8
284 		mov	r5, r5, lspull #24
285 		orr	r5, r5, r6, lspush #8
286 		mov	r6, r6, lspull #24
287 		orr	r6, r6, r7, lspush #8
288 		mov	r7, r7, lspull #24
289 		orr	r7, r7, r8, lspush #8
290 		stmia	dst!, {r4, r5, r6, r7}
291 		adcs	sum, sum, r4
292 		adcs	sum, sum, r5
293 		adcs	sum, sum, r6
294 		adcs	sum, sum, r7
295 		mov	r4, r8, lspull #24
296 		sub	ip, ip, #16
297 		teq	ip, #0
298 		bne	1b
299 2:		ands	ip, len, #12
300 		beq	4f
301 		tst	ip, #8
302 		beq	3f
303 		load2l	r5, r6
304 		orr	r4, r4, r5, lspush #8
305 		mov	r5, r5, lspull #24
306 		orr	r5, r5, r6, lspush #8
307 		stmia	dst!, {r4, r5}
308 		adcs	sum, sum, r4
309 		adcs	sum, sum, r5
310 		mov	r4, r6, lspull #24
311 		tst	ip, #4
312 		beq	4f
313 3:		load1l	r5
314 		orr	r4, r4, r5, lspush #8
315 		str	r4, [dst], #4
316 		adcs	sum, sum, r4
317 		mov	r4, r5, lspull #24
318 4:		ands	len, len, #3
319 		beq	.Ldone
320 		mov	r5, r4, get_byte_0
321 		tst	len, #2
322 		beq	.Lexit
323 		strb	r5, [dst], #1
324 		adcs	sum, sum, r4
325 		load1l	r4
326 		mov	r5, r4, get_byte_0
327 		strb	r5, [dst], #1
328 		adcs	sum, sum, r4, lspush #24
329 		mov	r5, r4, get_byte_1
330 		b	.Lexit
331 FN_EXIT
332