1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Quick'n'dirty IP checksum ...
7  *
8  * Copyright (C) 1998, 1999 Ralf Baechle
9  * Copyright (C) 1999 Silicon Graphics, Inc.
10  * Copyright (C) 2007  Maciej W. Rozycki
11  * Copyright (C) 2014 Imagination Technologies Ltd.
12  */
13 #include <linux/errno.h>
14 #include <linux/export.h>
15 #include <asm/asm.h>
16 #include <asm/asm-offsets.h>
17 #include <asm/regdef.h>
18 
19 #ifdef CONFIG_64BIT
20 /*
21  * As we are sharing code base with the mips32 tree (which use the o32 ABI
22  * register definitions). We need to redefine the register definitions from
23  * the n64 ABI register naming to the o32 ABI register naming.
24  */
25 #undef t0
26 #undef t1
27 #undef t2
28 #undef t3
29 #define t0	$8
30 #define t1	$9
31 #define t2	$10
32 #define t3	$11
33 #define t4	$12
34 #define t5	$13
35 #define t6	$14
36 #define t7	$15
37 
38 #define USE_DOUBLE
39 #endif
40 
41 #ifdef USE_DOUBLE
42 
43 #define LOAD   ld
44 #define LOAD32 lwu
45 #define ADD    daddu
46 #define NBYTES 8
47 
48 #else
49 
50 #define LOAD   lw
51 #define LOAD32 lw
52 #define ADD    addu
53 #define NBYTES 4
54 
55 #endif /* USE_DOUBLE */
56 
57 #define UNIT(unit)  ((unit)*NBYTES)
58 
59 #define ADDC(sum,reg)						\
60 	.set	push;						\
61 	.set	noat;						\
62 	ADD	sum, reg;					\
63 	sltu	v1, sum, reg;					\
64 	ADD	sum, v1;					\
65 	.set	pop
66 
67 #define ADDC32(sum,reg)						\
68 	.set	push;						\
69 	.set	noat;						\
70 	addu	sum, reg;					\
71 	sltu	v1, sum, reg;					\
72 	addu	sum, v1;					\
73 	.set	pop
74 
75 #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\
76 	LOAD	_t0, (offset + UNIT(0))(src);			\
77 	LOAD	_t1, (offset + UNIT(1))(src);			\
78 	LOAD	_t2, (offset + UNIT(2))(src);			\
79 	LOAD	_t3, (offset + UNIT(3))(src);			\
80 	ADDC(_t0, _t1);						\
81 	ADDC(_t2, _t3);						\
82 	ADDC(sum, _t0);						\
83 	ADDC(sum, _t2)
84 
85 #ifdef USE_DOUBLE
86 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\
87 	CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
88 #else
89 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)	\
90 	CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3);	\
91 	CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
92 #endif
93 
94 /*
95  * a0: source address
96  * a1: length of the area to checksum
97  * a2: partial checksum
98  */
99 
100 #define src a0
101 #define sum v0
102 
103 	.text
104 	.set	noreorder
105 	.align	5
106 LEAF(csum_partial)
107 EXPORT_SYMBOL(csum_partial)
108 	move	sum, zero
109 	move	t7, zero
110 
111 	sltiu	t8, a1, 0x8
112 	bnez	t8, .Lsmall_csumcpy		/* < 8 bytes to copy */
113 	 move	t2, a1
114 
115 	andi	t7, src, 0x1			/* odd buffer? */
116 
117 .Lhword_align:
118 	beqz	t7, .Lword_align
119 	 andi	t8, src, 0x2
120 
121 	lbu	t0, (src)
122 	LONG_SUBU	a1, a1, 0x1
123 #ifdef __MIPSEL__
124 	sll	t0, t0, 8
125 #endif
126 	ADDC(sum, t0)
127 	PTR_ADDU	src, src, 0x1
128 	andi	t8, src, 0x2
129 
130 .Lword_align:
131 	beqz	t8, .Ldword_align
132 	 sltiu	t8, a1, 56
133 
134 	lhu	t0, (src)
135 	LONG_SUBU	a1, a1, 0x2
136 	ADDC(sum, t0)
137 	sltiu	t8, a1, 56
138 	PTR_ADDU	src, src, 0x2
139 
140 .Ldword_align:
141 	bnez	t8, .Ldo_end_words
142 	 move	t8, a1
143 
144 	andi	t8, src, 0x4
145 	beqz	t8, .Lqword_align
146 	 andi	t8, src, 0x8
147 
148 	LOAD32	t0, 0x00(src)
149 	LONG_SUBU	a1, a1, 0x4
150 	ADDC(sum, t0)
151 	PTR_ADDU	src, src, 0x4
152 	andi	t8, src, 0x8
153 
154 .Lqword_align:
155 	beqz	t8, .Loword_align
156 	 andi	t8, src, 0x10
157 
158 #ifdef USE_DOUBLE
159 	ld	t0, 0x00(src)
160 	LONG_SUBU	a1, a1, 0x8
161 	ADDC(sum, t0)
162 #else
163 	lw	t0, 0x00(src)
164 	lw	t1, 0x04(src)
165 	LONG_SUBU	a1, a1, 0x8
166 	ADDC(sum, t0)
167 	ADDC(sum, t1)
168 #endif
169 	PTR_ADDU	src, src, 0x8
170 	andi	t8, src, 0x10
171 
172 .Loword_align:
173 	beqz	t8, .Lbegin_movement
174 	 LONG_SRL	t8, a1, 0x7
175 
176 #ifdef USE_DOUBLE
177 	ld	t0, 0x00(src)
178 	ld	t1, 0x08(src)
179 	ADDC(sum, t0)
180 	ADDC(sum, t1)
181 #else
182 	CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
183 #endif
184 	LONG_SUBU	a1, a1, 0x10
185 	PTR_ADDU	src, src, 0x10
186 	LONG_SRL	t8, a1, 0x7
187 
188 .Lbegin_movement:
189 	beqz	t8, 1f
190 	 andi	t2, a1, 0x40
191 
192 .Lmove_128bytes:
193 	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194 	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195 	CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196 	CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
197 	LONG_SUBU	t8, t8, 0x01
198 	.set	reorder				/* DADDI_WAR */
199 	PTR_ADDU	src, src, 0x80
200 	bnez	t8, .Lmove_128bytes
201 	.set	noreorder
202 
203 1:
204 	beqz	t2, 1f
205 	 andi	t2, a1, 0x20
206 
207 .Lmove_64bytes:
208 	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 	CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
210 	PTR_ADDU	src, src, 0x40
211 
212 1:
213 	beqz	t2, .Ldo_end_words
214 	 andi	t8, a1, 0x1c
215 
216 .Lmove_32bytes:
217 	CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
218 	andi	t8, a1, 0x1c
219 	PTR_ADDU	src, src, 0x20
220 
221 .Ldo_end_words:
222 	beqz	t8, .Lsmall_csumcpy
223 	 andi	t2, a1, 0x3
224 	LONG_SRL	t8, t8, 0x2
225 
226 .Lend_words:
227 	LOAD32	t0, (src)
228 	LONG_SUBU	t8, t8, 0x1
229 	ADDC(sum, t0)
230 	.set	reorder				/* DADDI_WAR */
231 	PTR_ADDU	src, src, 0x4
232 	bnez	t8, .Lend_words
233 	.set	noreorder
234 
235 /* unknown src alignment and < 8 bytes to go  */
236 .Lsmall_csumcpy:
237 	move	a1, t2
238 
239 	andi	t0, a1, 4
240 	beqz	t0, 1f
241 	 andi	t0, a1, 2
242 
243 	/* Still a full word to go  */
244 	ulw	t1, (src)
245 	PTR_ADDIU	src, 4
246 #ifdef USE_DOUBLE
247 	dsll	t1, t1, 32			/* clear lower 32bit */
248 #endif
249 	ADDC(sum, t1)
250 
251 1:	move	t1, zero
252 	beqz	t0, 1f
253 	 andi	t0, a1, 1
254 
255 	/* Still a halfword to go  */
256 	ulhu	t1, (src)
257 	PTR_ADDIU	src, 2
258 
259 1:	beqz	t0, 1f
260 	 sll	t1, t1, 16
261 
262 	lbu	t2, (src)
263 	 nop
264 
265 #ifdef __MIPSEB__
266 	sll	t2, t2, 8
267 #endif
268 	or	t1, t2
269 
270 1:	ADDC(sum, t1)
271 
272 	/* fold checksum */
273 #ifdef USE_DOUBLE
274 	dsll32	v1, sum, 0
275 	daddu	sum, v1
276 	sltu	v1, sum, v1
277 	dsra32	sum, sum, 0
278 	addu	sum, v1
279 #endif
280 
281 	/* odd buffer alignment? */
282 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
283     defined(CONFIG_CPU_LOONGSON64)
284 	.set	push
285 	.set	arch=mips32r2
286 	wsbh	v1, sum
287 	movn	sum, v1, t7
288 	.set	pop
289 #else
290 	beqz	t7, 1f			/* odd buffer alignment? */
291 	 lui	v1, 0x00ff
292 	addu	v1, 0x00ff
293 	and	t0, sum, v1
294 	sll	t0, t0, 8
295 	srl	sum, sum, 8
296 	and	sum, sum, v1
297 	or	sum, sum, t0
298 1:
299 #endif
300 	.set	reorder
301 	/* Add the passed partial csum.	 */
302 	ADDC32(sum, a2)
303 	jr	ra
304 	.set	noreorder
305 	END(csum_partial)
306 
307 
308 /*
309  * checksum and copy routines based on memcpy.S
310  *
311  *	csum_partial_copy_nocheck(src, dst, len)
312  *	__csum_partial_copy_kernel(src, dst, len)
313  *
314  * See "Spec" in memcpy.S for details.	Unlike __copy_user, all
315  * function in this file use the standard calling convention.
316  */
317 
318 #define src a0
319 #define dst a1
320 #define len a2
321 #define sum v0
322 #define odd t8
323 
324 /*
325  * All exception handlers simply return 0.
326  */
327 
328 /* Instruction type */
329 #define LD_INSN 1
330 #define ST_INSN 2
331 #define LEGACY_MODE 1
332 #define EVA_MODE    2
333 #define USEROP   1
334 #define KERNELOP 2
335 
336 /*
337  * Wrapper to add an entry in the exception table
338  * in case the insn causes a memory exception.
339  * Arguments:
340  * insn    : Load/store instruction
341  * type    : Instruction type
342  * reg     : Register
343  * addr    : Address
344  * handler : Exception handler
345  */
346 #define EXC(insn, type, reg, addr)		\
347 	.if \mode == LEGACY_MODE;		\
348 9:		insn reg, addr;			\
349 		.section __ex_table,"a";	\
350 		PTR_WD	9b, .L_exc;		\
351 		.previous;			\
352 	/* This is enabled in EVA mode */	\
353 	.else;					\
354 		/* If loading from user or storing to user */	\
355 		.if ((\from == USEROP) && (type == LD_INSN)) || \
356 		    ((\to == USEROP) && (type == ST_INSN));	\
357 9:			__BUILD_EVA_INSN(insn##e, reg, addr);	\
358 			.section __ex_table,"a";		\
359 			PTR_WD	9b, .L_exc;			\
360 			.previous;				\
361 		.else;						\
362 			/* EVA without exception */		\
363 			insn reg, addr;				\
364 		.endif;						\
365 	.endif
366 
367 #undef LOAD
368 
369 #ifdef USE_DOUBLE
370 
371 #define LOADK	ld /* No exception */
372 #define LOAD(reg, addr)		EXC(ld, LD_INSN, reg, addr)
373 #define LOADBU(reg, addr)	EXC(lbu, LD_INSN, reg, addr)
374 #define LOADL(reg, addr)	EXC(ldl, LD_INSN, reg, addr)
375 #define LOADR(reg, addr)	EXC(ldr, LD_INSN, reg, addr)
376 #define STOREB(reg, addr)	EXC(sb, ST_INSN, reg, addr)
377 #define STOREL(reg, addr)	EXC(sdl, ST_INSN, reg, addr)
378 #define STORER(reg, addr)	EXC(sdr, ST_INSN, reg, addr)
379 #define STORE(reg, addr)	EXC(sd, ST_INSN, reg, addr)
380 #define ADD    daddu
381 #define SUB    dsubu
382 #define SRL    dsrl
383 #define SLL    dsll
384 #define SLLV   dsllv
385 #define SRLV   dsrlv
386 #define NBYTES 8
387 #define LOG_NBYTES 3
388 
389 #else
390 
391 #define LOADK	lw /* No exception */
392 #define LOAD(reg, addr)		EXC(lw, LD_INSN, reg, addr)
393 #define LOADBU(reg, addr)	EXC(lbu, LD_INSN, reg, addr)
394 #define LOADL(reg, addr)	EXC(lwl, LD_INSN, reg, addr)
395 #define LOADR(reg, addr)	EXC(lwr, LD_INSN, reg, addr)
396 #define STOREB(reg, addr)	EXC(sb, ST_INSN, reg, addr)
397 #define STOREL(reg, addr)	EXC(swl, ST_INSN, reg, addr)
398 #define STORER(reg, addr)	EXC(swr, ST_INSN, reg, addr)
399 #define STORE(reg, addr)	EXC(sw, ST_INSN, reg, addr)
400 #define ADD    addu
401 #define SUB    subu
402 #define SRL    srl
403 #define SLL    sll
404 #define SLLV   sllv
405 #define SRLV   srlv
406 #define NBYTES 4
407 #define LOG_NBYTES 2
408 
409 #endif /* USE_DOUBLE */
410 
411 #ifdef CONFIG_CPU_LITTLE_ENDIAN
412 #define LDFIRST LOADR
413 #define LDREST	LOADL
414 #define STFIRST STORER
415 #define STREST	STOREL
416 #define SHIFT_DISCARD SLLV
417 #define SHIFT_DISCARD_REVERT SRLV
418 #else
419 #define LDFIRST LOADL
420 #define LDREST	LOADR
421 #define STFIRST STOREL
422 #define STREST	STORER
423 #define SHIFT_DISCARD SRLV
424 #define SHIFT_DISCARD_REVERT SLLV
425 #endif
426 
427 #define FIRST(unit) ((unit)*NBYTES)
428 #define REST(unit)  (FIRST(unit)+NBYTES-1)
429 
430 #define ADDRMASK (NBYTES-1)
431 
432 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
433 	.set	noat
434 #else
435 	.set	at=v1
436 #endif
437 
438 	.macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
439 
440 	li	sum, -1
441 	move	odd, zero
442 	/*
443 	 * Note: dst & src may be unaligned, len may be 0
444 	 * Temps
445 	 */
446 	/*
447 	 * The "issue break"s below are very approximate.
448 	 * Issue delays for dcache fills will perturb the schedule, as will
449 	 * load queue full replay traps, etc.
450 	 *
451 	 * If len < NBYTES use byte operations.
452 	 */
453 	sltu	t2, len, NBYTES
454 	and	t1, dst, ADDRMASK
455 	bnez	t2, .Lcopy_bytes_checklen\@
456 	 and	t0, src, ADDRMASK
457 	andi	odd, dst, 0x1			/* odd buffer? */
458 	bnez	t1, .Ldst_unaligned\@
459 	 nop
460 	bnez	t0, .Lsrc_unaligned_dst_aligned\@
461 	/*
462 	 * use delay slot for fall-through
463 	 * src and dst are aligned; need to compute rem
464 	 */
465 .Lboth_aligned\@:
466 	 SRL	t0, len, LOG_NBYTES+3	 # +3 for 8 units/iter
467 	beqz	t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
468 	 nop
469 	SUB	len, 8*NBYTES		# subtract here for bgez loop
470 	.align	4
471 1:
472 	LOAD(t0, UNIT(0)(src))
473 	LOAD(t1, UNIT(1)(src))
474 	LOAD(t2, UNIT(2)(src))
475 	LOAD(t3, UNIT(3)(src))
476 	LOAD(t4, UNIT(4)(src))
477 	LOAD(t5, UNIT(5)(src))
478 	LOAD(t6, UNIT(6)(src))
479 	LOAD(t7, UNIT(7)(src))
480 	SUB	len, len, 8*NBYTES
481 	ADD	src, src, 8*NBYTES
482 	STORE(t0, UNIT(0)(dst))
483 	ADDC(t0, t1)
484 	STORE(t1, UNIT(1)(dst))
485 	ADDC(sum, t0)
486 	STORE(t2, UNIT(2)(dst))
487 	ADDC(t2, t3)
488 	STORE(t3, UNIT(3)(dst))
489 	ADDC(sum, t2)
490 	STORE(t4, UNIT(4)(dst))
491 	ADDC(t4, t5)
492 	STORE(t5, UNIT(5)(dst))
493 	ADDC(sum, t4)
494 	STORE(t6, UNIT(6)(dst))
495 	ADDC(t6, t7)
496 	STORE(t7, UNIT(7)(dst))
497 	ADDC(sum, t6)
498 	.set	reorder				/* DADDI_WAR */
499 	ADD	dst, dst, 8*NBYTES
500 	bgez	len, 1b
501 	.set	noreorder
502 	ADD	len, 8*NBYTES		# revert len (see above)
503 
504 	/*
505 	 * len == the number of bytes left to copy < 8*NBYTES
506 	 */
507 .Lcleanup_both_aligned\@:
508 #define rem t7
509 	beqz	len, .Ldone\@
510 	 sltu	t0, len, 4*NBYTES
511 	bnez	t0, .Lless_than_4units\@
512 	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
513 	/*
514 	 * len >= 4*NBYTES
515 	 */
516 	LOAD(t0, UNIT(0)(src))
517 	LOAD(t1, UNIT(1)(src))
518 	LOAD(t2, UNIT(2)(src))
519 	LOAD(t3, UNIT(3)(src))
520 	SUB	len, len, 4*NBYTES
521 	ADD	src, src, 4*NBYTES
522 	STORE(t0, UNIT(0)(dst))
523 	ADDC(t0, t1)
524 	STORE(t1, UNIT(1)(dst))
525 	ADDC(sum, t0)
526 	STORE(t2, UNIT(2)(dst))
527 	ADDC(t2, t3)
528 	STORE(t3, UNIT(3)(dst))
529 	ADDC(sum, t2)
530 	.set	reorder				/* DADDI_WAR */
531 	ADD	dst, dst, 4*NBYTES
532 	beqz	len, .Ldone\@
533 	.set	noreorder
534 .Lless_than_4units\@:
535 	/*
536 	 * rem = len % NBYTES
537 	 */
538 	beq	rem, len, .Lcopy_bytes\@
539 	 nop
540 1:
541 	LOAD(t0, 0(src))
542 	ADD	src, src, NBYTES
543 	SUB	len, len, NBYTES
544 	STORE(t0, 0(dst))
545 	ADDC(sum, t0)
546 	.set	reorder				/* DADDI_WAR */
547 	ADD	dst, dst, NBYTES
548 	bne	rem, len, 1b
549 	.set	noreorder
550 
551 	/*
552 	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
553 	 * A loop would do only a byte at a time with possible branch
554 	 * mispredicts.	 Can't do an explicit LOAD dst,mask,or,STORE
555 	 * because can't assume read-access to dst.  Instead, use
556 	 * STREST dst, which doesn't require read access to dst.
557 	 *
558 	 * This code should perform better than a simple loop on modern,
559 	 * wide-issue mips processors because the code has fewer branches and
560 	 * more instruction-level parallelism.
561 	 */
562 #define bits t2
563 	beqz	len, .Ldone\@
564 	 ADD	t1, dst, len	# t1 is just past last byte of dst
565 	li	bits, 8*NBYTES
566 	SLL	rem, len, 3	# rem = number of bits to keep
567 	LOAD(t0, 0(src))
568 	SUB	bits, bits, rem # bits = number of bits to discard
569 	SHIFT_DISCARD t0, t0, bits
570 	STREST(t0, -1(t1))
571 	SHIFT_DISCARD_REVERT t0, t0, bits
572 	.set reorder
573 	ADDC(sum, t0)
574 	b	.Ldone\@
575 	.set noreorder
576 .Ldst_unaligned\@:
577 	/*
578 	 * dst is unaligned
579 	 * t0 = src & ADDRMASK
580 	 * t1 = dst & ADDRMASK; T1 > 0
581 	 * len >= NBYTES
582 	 *
583 	 * Copy enough bytes to align dst
584 	 * Set match = (src and dst have same alignment)
585 	 */
586 #define match rem
587 	LDFIRST(t3, FIRST(0)(src))
588 	ADD	t2, zero, NBYTES
589 	LDREST(t3, REST(0)(src))
590 	SUB	t2, t2, t1	# t2 = number of bytes copied
591 	xor	match, t0, t1
592 	STFIRST(t3, FIRST(0)(dst))
593 	SLL	t4, t1, 3		# t4 = number of bits to discard
594 	SHIFT_DISCARD t3, t3, t4
595 	/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
596 	ADDC(sum, t3)
597 	beq	len, t2, .Ldone\@
598 	 SUB	len, len, t2
599 	ADD	dst, dst, t2
600 	beqz	match, .Lboth_aligned\@
601 	 ADD	src, src, t2
602 
603 .Lsrc_unaligned_dst_aligned\@:
604 	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
605 	beqz	t0, .Lcleanup_src_unaligned\@
606 	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
607 1:
608 /*
609  * Avoid consecutive LD*'s to the same register since some mips
610  * implementations can't issue them in the same cycle.
611  * It's OK to load FIRST(N+1) before REST(N) because the two addresses
612  * are to the same unit (unless src is aligned, but it's not).
613  */
614 	LDFIRST(t0, FIRST(0)(src))
615 	LDFIRST(t1, FIRST(1)(src))
616 	SUB	len, len, 4*NBYTES
617 	LDREST(t0, REST(0)(src))
618 	LDREST(t1, REST(1)(src))
619 	LDFIRST(t2, FIRST(2)(src))
620 	LDFIRST(t3, FIRST(3)(src))
621 	LDREST(t2, REST(2)(src))
622 	LDREST(t3, REST(3)(src))
623 	ADD	src, src, 4*NBYTES
624 #ifdef CONFIG_CPU_SB1
625 	nop				# improves slotting
626 #endif
627 	STORE(t0, UNIT(0)(dst))
628 	ADDC(t0, t1)
629 	STORE(t1, UNIT(1)(dst))
630 	ADDC(sum, t0)
631 	STORE(t2, UNIT(2)(dst))
632 	ADDC(t2, t3)
633 	STORE(t3, UNIT(3)(dst))
634 	ADDC(sum, t2)
635 	.set	reorder				/* DADDI_WAR */
636 	ADD	dst, dst, 4*NBYTES
637 	bne	len, rem, 1b
638 	.set	noreorder
639 
640 .Lcleanup_src_unaligned\@:
641 	beqz	len, .Ldone\@
642 	 and	rem, len, NBYTES-1  # rem = len % NBYTES
643 	beq	rem, len, .Lcopy_bytes\@
644 	 nop
645 1:
646 	LDFIRST(t0, FIRST(0)(src))
647 	LDREST(t0, REST(0)(src))
648 	ADD	src, src, NBYTES
649 	SUB	len, len, NBYTES
650 	STORE(t0, 0(dst))
651 	ADDC(sum, t0)
652 	.set	reorder				/* DADDI_WAR */
653 	ADD	dst, dst, NBYTES
654 	bne	len, rem, 1b
655 	.set	noreorder
656 
657 .Lcopy_bytes_checklen\@:
658 	beqz	len, .Ldone\@
659 	 nop
660 .Lcopy_bytes\@:
661 	/* 0 < len < NBYTES  */
662 #ifdef CONFIG_CPU_LITTLE_ENDIAN
663 #define SHIFT_START 0
664 #define SHIFT_INC 8
665 #else
666 #define SHIFT_START 8*(NBYTES-1)
667 #define SHIFT_INC -8
668 #endif
669 	move	t2, zero	# partial word
670 	li	t3, SHIFT_START # shift
671 #define COPY_BYTE(N)			\
672 	LOADBU(t0, N(src));		\
673 	SUB	len, len, 1;		\
674 	STOREB(t0, N(dst));		\
675 	SLLV	t0, t0, t3;		\
676 	addu	t3, SHIFT_INC;		\
677 	beqz	len, .Lcopy_bytes_done\@; \
678 	 or	t2, t0
679 
680 	COPY_BYTE(0)
681 	COPY_BYTE(1)
682 #ifdef USE_DOUBLE
683 	COPY_BYTE(2)
684 	COPY_BYTE(3)
685 	COPY_BYTE(4)
686 	COPY_BYTE(5)
687 #endif
688 	LOADBU(t0, NBYTES-2(src))
689 	SUB	len, len, 1
690 	STOREB(t0, NBYTES-2(dst))
691 	SLLV	t0, t0, t3
692 	or	t2, t0
693 .Lcopy_bytes_done\@:
694 	ADDC(sum, t2)
695 .Ldone\@:
696 	/* fold checksum */
697 	.set	push
698 	.set	noat
699 #ifdef USE_DOUBLE
700 	dsll32	v1, sum, 0
701 	daddu	sum, v1
702 	sltu	v1, sum, v1
703 	dsra32	sum, sum, 0
704 	addu	sum, v1
705 #endif
706 
707 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
708     defined(CONFIG_CPU_LOONGSON64)
709 	.set	push
710 	.set	arch=mips32r2
711 	wsbh	v1, sum
712 	movn	sum, v1, odd
713 	.set	pop
714 #else
715 	beqz	odd, 1f			/* odd buffer alignment? */
716 	 lui	v1, 0x00ff
717 	addu	v1, 0x00ff
718 	and	t0, sum, v1
719 	sll	t0, t0, 8
720 	srl	sum, sum, 8
721 	and	sum, sum, v1
722 	or	sum, sum, t0
723 1:
724 #endif
725 	.set	pop
726 	.set reorder
727 	jr	ra
728 	.set noreorder
729 	.endm
730 
731 	.set noreorder
732 .L_exc:
733 	jr	ra
734 	 li	v0, 0
735 
736 FEXPORT(__csum_partial_copy_nocheck)
737 EXPORT_SYMBOL(__csum_partial_copy_nocheck)
738 #ifndef CONFIG_EVA
739 FEXPORT(__csum_partial_copy_to_user)
740 EXPORT_SYMBOL(__csum_partial_copy_to_user)
741 FEXPORT(__csum_partial_copy_from_user)
742 EXPORT_SYMBOL(__csum_partial_copy_from_user)
743 #endif
744 __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
745 
746 #ifdef CONFIG_EVA
747 LEAF(__csum_partial_copy_to_user)
748 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
749 END(__csum_partial_copy_to_user)
750 
751 LEAF(__csum_partial_copy_from_user)
752 __BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
753 END(__csum_partial_copy_from_user)
754 #endif
755