1 /*
2  * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file COPYING in the main directory of this archive
6  * for more details. No warranty for anything given at all.
7  */
8 #include <linux/linkage.h>
9 #include <asm/errno.h>
10 #include <asm/asm.h>
11 
12 /*
13  * Checksum copy with exception handling.
14  * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15  * destination is zeroed.
16  *
17  * Input
18  * rdi  source
19  * rsi  destination
20  * edx  len (32bit)
21  *
22  * Output
23  * eax  64bit sum. undefined in case of exception.
24  *
25  * Wrappers need to take care of valid exception sum and zeroing.
26  * They also should align source or destination to 8 bytes.
27  */
28 
29 	.macro source
30 10:
31 	_ASM_EXTABLE_UA(10b, .Lfault)
32 	.endm
33 
34 	.macro dest
35 20:
36 	_ASM_EXTABLE_UA(20b, .Lfault)
37 	.endm
38 
39 SYM_FUNC_START(csum_partial_copy_generic)
40 	subq  $5*8, %rsp
41 	movq  %rbx, 0*8(%rsp)
42 	movq  %r12, 1*8(%rsp)
43 	movq  %r14, 2*8(%rsp)
44 	movq  %r13, 3*8(%rsp)
45 	movq  %r15, 4*8(%rsp)
46 
47 	movl  $-1, %eax
48 	xorl  %r9d, %r9d
49 	movl  %edx, %ecx
50 	cmpl  $8, %ecx
51 	jb    .Lshort
52 
53 	testb  $7, %sil
54 	jne   .Lunaligned
55 .Laligned:
56 	movl  %ecx, %r12d
57 
58 	shrq  $6, %r12
59 	jz	.Lhandle_tail       /* < 64 */
60 
61 	clc
62 
63 	/* main loop. clear in 64 byte blocks */
64 	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
65 	/* r11:	temp3, rdx: temp4, r12 loopcnt */
66 	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
67 	.p2align 4
68 .Lloop:
69 	source
70 	movq  (%rdi), %rbx
71 	source
72 	movq  8(%rdi), %r8
73 	source
74 	movq  16(%rdi), %r11
75 	source
76 	movq  24(%rdi), %rdx
77 
78 	source
79 	movq  32(%rdi), %r10
80 	source
81 	movq  40(%rdi), %r15
82 	source
83 	movq  48(%rdi), %r14
84 	source
85 	movq  56(%rdi), %r13
86 
87 30:
88 	/*
89 	 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
90 	 * potentially unmapped kernel address.
91 	 */
92 	_ASM_EXTABLE(30b, 2f)
93 	prefetcht0 5*64(%rdi)
94 2:
95 	adcq  %rbx, %rax
96 	adcq  %r8, %rax
97 	adcq  %r11, %rax
98 	adcq  %rdx, %rax
99 	adcq  %r10, %rax
100 	adcq  %r15, %rax
101 	adcq  %r14, %rax
102 	adcq  %r13, %rax
103 
104 	decl %r12d
105 
106 	dest
107 	movq %rbx, (%rsi)
108 	dest
109 	movq %r8, 8(%rsi)
110 	dest
111 	movq %r11, 16(%rsi)
112 	dest
113 	movq %rdx, 24(%rsi)
114 
115 	dest
116 	movq %r10, 32(%rsi)
117 	dest
118 	movq %r15, 40(%rsi)
119 	dest
120 	movq %r14, 48(%rsi)
121 	dest
122 	movq %r13, 56(%rsi)
123 
124 	leaq 64(%rdi), %rdi
125 	leaq 64(%rsi), %rsi
126 
127 	jnz	.Lloop
128 
129 	adcq  %r9, %rax
130 
131 	/* do last up to 56 bytes */
132 .Lhandle_tail:
133 	/* ecx:	count, rcx.63: the end result needs to be rol8 */
134 	movq %rcx, %r10
135 	andl $63, %ecx
136 	shrl $3, %ecx
137 	jz	.Lfold
138 	clc
139 	.p2align 4
140 .Lloop_8:
141 	source
142 	movq (%rdi), %rbx
143 	adcq %rbx, %rax
144 	decl %ecx
145 	dest
146 	movq %rbx, (%rsi)
147 	leaq 8(%rsi), %rsi /* preserve carry */
148 	leaq 8(%rdi), %rdi
149 	jnz	.Lloop_8
150 	adcq %r9, %rax	/* add in carry */
151 
152 .Lfold:
153 	/* reduce checksum to 32bits */
154 	movl %eax, %ebx
155 	shrq $32, %rax
156 	addl %ebx, %eax
157 	adcl %r9d, %eax
158 
159 	/* do last up to 6 bytes */
160 .Lhandle_7:
161 	movl %r10d, %ecx
162 	andl $7, %ecx
163 .L1:				/* .Lshort rejoins the common path here */
164 	shrl $1, %ecx
165 	jz   .Lhandle_1
166 	movl $2, %edx
167 	xorl %ebx, %ebx
168 	clc
169 	.p2align 4
170 .Lloop_1:
171 	source
172 	movw (%rdi), %bx
173 	adcl %ebx, %eax
174 	decl %ecx
175 	dest
176 	movw %bx, (%rsi)
177 	leaq 2(%rdi), %rdi
178 	leaq 2(%rsi), %rsi
179 	jnz .Lloop_1
180 	adcl %r9d, %eax	/* add in carry */
181 
182 	/* handle last odd byte */
183 .Lhandle_1:
184 	testb $1, %r10b
185 	jz    .Lende
186 	xorl  %ebx, %ebx
187 	source
188 	movb (%rdi), %bl
189 	dest
190 	movb %bl, (%rsi)
191 	addl %ebx, %eax
192 	adcl %r9d, %eax		/* carry */
193 
194 .Lende:
195 	testq %r10, %r10
196 	js  .Lwas_odd
197 .Lout:
198 	movq 0*8(%rsp), %rbx
199 	movq 1*8(%rsp), %r12
200 	movq 2*8(%rsp), %r14
201 	movq 3*8(%rsp), %r13
202 	movq 4*8(%rsp), %r15
203 	addq $5*8, %rsp
204 	RET
205 .Lshort:
206 	movl %ecx, %r10d
207 	jmp  .L1
208 .Lunaligned:
209 	xorl %ebx, %ebx
210 	testb $1, %sil
211 	jne  .Lodd
212 1:	testb $2, %sil
213 	je   2f
214 	source
215 	movw (%rdi), %bx
216 	dest
217 	movw %bx, (%rsi)
218 	leaq 2(%rdi), %rdi
219 	subq $2, %rcx
220 	leaq 2(%rsi), %rsi
221 	addq %rbx, %rax
222 2:	testb $4, %sil
223 	je .Laligned
224 	source
225 	movl (%rdi), %ebx
226 	dest
227 	movl %ebx, (%rsi)
228 	leaq 4(%rdi), %rdi
229 	subq $4, %rcx
230 	leaq 4(%rsi), %rsi
231 	addq %rbx, %rax
232 	jmp .Laligned
233 
234 .Lodd:
235 	source
236 	movb (%rdi), %bl
237 	dest
238 	movb %bl, (%rsi)
239 	leaq 1(%rdi), %rdi
240 	leaq 1(%rsi), %rsi
241 	/* decrement, set MSB */
242 	leaq -1(%rcx, %rcx), %rcx
243 	rorq $1, %rcx
244 	shll $8, %ebx
245 	addq %rbx, %rax
246 	jmp 1b
247 
248 .Lwas_odd:
249 	roll $8, %eax
250 	jmp .Lout
251 
252 	/* Exception: just return 0 */
253 .Lfault:
254 	xorl %eax, %eax
255 	jmp  .Lout
256 SYM_FUNC_END(csum_partial_copy_generic)
257