18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public
58c2ecf20Sopenharmony_ci * License.  See the file COPYING in the main directory of this archive
68c2ecf20Sopenharmony_ci * for more details. No warranty for anything given at all.
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci#include <linux/linkage.h>
98c2ecf20Sopenharmony_ci#include <asm/errno.h>
108c2ecf20Sopenharmony_ci#include <asm/asm.h>
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci/*
138c2ecf20Sopenharmony_ci * Checksum copy with exception handling.
148c2ecf20Sopenharmony_ci * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
158c2ecf20Sopenharmony_ci * destination is zeroed.
168c2ecf20Sopenharmony_ci *
178c2ecf20Sopenharmony_ci * Input
188c2ecf20Sopenharmony_ci * rdi  source
198c2ecf20Sopenharmony_ci * rsi  destination
208c2ecf20Sopenharmony_ci * edx  len (32bit)
218c2ecf20Sopenharmony_ci *
228c2ecf20Sopenharmony_ci * Output
238c2ecf20Sopenharmony_ci * eax  64bit sum. undefined in case of exception.
248c2ecf20Sopenharmony_ci *
258c2ecf20Sopenharmony_ci * Wrappers need to take care of valid exception sum and zeroing.
268c2ecf20Sopenharmony_ci * They also should align source or destination to 8 bytes.
278c2ecf20Sopenharmony_ci */
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	.macro source
308c2ecf20Sopenharmony_ci10:
318c2ecf20Sopenharmony_ci	_ASM_EXTABLE_UA(10b, .Lfault)
328c2ecf20Sopenharmony_ci	.endm
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci	.macro dest
358c2ecf20Sopenharmony_ci20:
368c2ecf20Sopenharmony_ci	_ASM_EXTABLE_UA(20b, .Lfault)
378c2ecf20Sopenharmony_ci	.endm
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic)
408c2ecf20Sopenharmony_ci	subq  $5*8, %rsp
418c2ecf20Sopenharmony_ci	movq  %rbx, 0*8(%rsp)
428c2ecf20Sopenharmony_ci	movq  %r12, 1*8(%rsp)
438c2ecf20Sopenharmony_ci	movq  %r14, 2*8(%rsp)
448c2ecf20Sopenharmony_ci	movq  %r13, 3*8(%rsp)
458c2ecf20Sopenharmony_ci	movq  %r15, 4*8(%rsp)
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	movl  $-1, %eax
488c2ecf20Sopenharmony_ci	xorl  %r9d, %r9d
498c2ecf20Sopenharmony_ci	movl  %edx, %ecx
508c2ecf20Sopenharmony_ci	cmpl  $8, %ecx
518c2ecf20Sopenharmony_ci	jb    .Lshort
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	testb  $7, %sil
548c2ecf20Sopenharmony_ci	jne   .Lunaligned
558c2ecf20Sopenharmony_ci.Laligned:
568c2ecf20Sopenharmony_ci	movl  %ecx, %r12d
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	shrq  $6, %r12
598c2ecf20Sopenharmony_ci	jz	.Lhandle_tail       /* < 64 */
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	clc
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	/* main loop. clear in 64 byte blocks */
648c2ecf20Sopenharmony_ci	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
658c2ecf20Sopenharmony_ci	/* r11:	temp3, rdx: temp4, r12 loopcnt */
668c2ecf20Sopenharmony_ci	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
678c2ecf20Sopenharmony_ci	.p2align 4
688c2ecf20Sopenharmony_ci.Lloop:
698c2ecf20Sopenharmony_ci	source
708c2ecf20Sopenharmony_ci	movq  (%rdi), %rbx
718c2ecf20Sopenharmony_ci	source
728c2ecf20Sopenharmony_ci	movq  8(%rdi), %r8
738c2ecf20Sopenharmony_ci	source
748c2ecf20Sopenharmony_ci	movq  16(%rdi), %r11
758c2ecf20Sopenharmony_ci	source
768c2ecf20Sopenharmony_ci	movq  24(%rdi), %rdx
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	source
798c2ecf20Sopenharmony_ci	movq  32(%rdi), %r10
808c2ecf20Sopenharmony_ci	source
818c2ecf20Sopenharmony_ci	movq  40(%rdi), %r15
828c2ecf20Sopenharmony_ci	source
838c2ecf20Sopenharmony_ci	movq  48(%rdi), %r14
848c2ecf20Sopenharmony_ci	source
858c2ecf20Sopenharmony_ci	movq  56(%rdi), %r13
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci30:
888c2ecf20Sopenharmony_ci	/*
898c2ecf20Sopenharmony_ci	 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
908c2ecf20Sopenharmony_ci	 * potentially unmapped kernel address.
918c2ecf20Sopenharmony_ci	 */
928c2ecf20Sopenharmony_ci	_ASM_EXTABLE(30b, 2f)
938c2ecf20Sopenharmony_ci	prefetcht0 5*64(%rdi)
948c2ecf20Sopenharmony_ci2:
958c2ecf20Sopenharmony_ci	adcq  %rbx, %rax
968c2ecf20Sopenharmony_ci	adcq  %r8, %rax
978c2ecf20Sopenharmony_ci	adcq  %r11, %rax
988c2ecf20Sopenharmony_ci	adcq  %rdx, %rax
998c2ecf20Sopenharmony_ci	adcq  %r10, %rax
1008c2ecf20Sopenharmony_ci	adcq  %r15, %rax
1018c2ecf20Sopenharmony_ci	adcq  %r14, %rax
1028c2ecf20Sopenharmony_ci	adcq  %r13, %rax
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	decl %r12d
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	dest
1078c2ecf20Sopenharmony_ci	movq %rbx, (%rsi)
1088c2ecf20Sopenharmony_ci	dest
1098c2ecf20Sopenharmony_ci	movq %r8, 8(%rsi)
1108c2ecf20Sopenharmony_ci	dest
1118c2ecf20Sopenharmony_ci	movq %r11, 16(%rsi)
1128c2ecf20Sopenharmony_ci	dest
1138c2ecf20Sopenharmony_ci	movq %rdx, 24(%rsi)
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	dest
1168c2ecf20Sopenharmony_ci	movq %r10, 32(%rsi)
1178c2ecf20Sopenharmony_ci	dest
1188c2ecf20Sopenharmony_ci	movq %r15, 40(%rsi)
1198c2ecf20Sopenharmony_ci	dest
1208c2ecf20Sopenharmony_ci	movq %r14, 48(%rsi)
1218c2ecf20Sopenharmony_ci	dest
1228c2ecf20Sopenharmony_ci	movq %r13, 56(%rsi)
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	leaq 64(%rdi), %rdi
1258c2ecf20Sopenharmony_ci	leaq 64(%rsi), %rsi
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	jnz	.Lloop
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	adcq  %r9, %rax
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	/* do last up to 56 bytes */
1328c2ecf20Sopenharmony_ci.Lhandle_tail:
1338c2ecf20Sopenharmony_ci	/* ecx:	count, rcx.63: the end result needs to be rol8 */
1348c2ecf20Sopenharmony_ci	movq %rcx, %r10
1358c2ecf20Sopenharmony_ci	andl $63, %ecx
1368c2ecf20Sopenharmony_ci	shrl $3, %ecx
1378c2ecf20Sopenharmony_ci	jz	.Lfold
1388c2ecf20Sopenharmony_ci	clc
1398c2ecf20Sopenharmony_ci	.p2align 4
1408c2ecf20Sopenharmony_ci.Lloop_8:
1418c2ecf20Sopenharmony_ci	source
1428c2ecf20Sopenharmony_ci	movq (%rdi), %rbx
1438c2ecf20Sopenharmony_ci	adcq %rbx, %rax
1448c2ecf20Sopenharmony_ci	decl %ecx
1458c2ecf20Sopenharmony_ci	dest
1468c2ecf20Sopenharmony_ci	movq %rbx, (%rsi)
1478c2ecf20Sopenharmony_ci	leaq 8(%rsi), %rsi /* preserve carry */
1488c2ecf20Sopenharmony_ci	leaq 8(%rdi), %rdi
1498c2ecf20Sopenharmony_ci	jnz	.Lloop_8
1508c2ecf20Sopenharmony_ci	adcq %r9, %rax	/* add in carry */
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci.Lfold:
1538c2ecf20Sopenharmony_ci	/* reduce checksum to 32bits */
1548c2ecf20Sopenharmony_ci	movl %eax, %ebx
1558c2ecf20Sopenharmony_ci	shrq $32, %rax
1568c2ecf20Sopenharmony_ci	addl %ebx, %eax
1578c2ecf20Sopenharmony_ci	adcl %r9d, %eax
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	/* do last up to 6 bytes */
1608c2ecf20Sopenharmony_ci.Lhandle_7:
1618c2ecf20Sopenharmony_ci	movl %r10d, %ecx
1628c2ecf20Sopenharmony_ci	andl $7, %ecx
1638c2ecf20Sopenharmony_ci.L1:				/* .Lshort rejoins the common path here */
1648c2ecf20Sopenharmony_ci	shrl $1, %ecx
1658c2ecf20Sopenharmony_ci	jz   .Lhandle_1
1668c2ecf20Sopenharmony_ci	movl $2, %edx
1678c2ecf20Sopenharmony_ci	xorl %ebx, %ebx
1688c2ecf20Sopenharmony_ci	clc
1698c2ecf20Sopenharmony_ci	.p2align 4
1708c2ecf20Sopenharmony_ci.Lloop_1:
1718c2ecf20Sopenharmony_ci	source
1728c2ecf20Sopenharmony_ci	movw (%rdi), %bx
1738c2ecf20Sopenharmony_ci	adcl %ebx, %eax
1748c2ecf20Sopenharmony_ci	decl %ecx
1758c2ecf20Sopenharmony_ci	dest
1768c2ecf20Sopenharmony_ci	movw %bx, (%rsi)
1778c2ecf20Sopenharmony_ci	leaq 2(%rdi), %rdi
1788c2ecf20Sopenharmony_ci	leaq 2(%rsi), %rsi
1798c2ecf20Sopenharmony_ci	jnz .Lloop_1
1808c2ecf20Sopenharmony_ci	adcl %r9d, %eax	/* add in carry */
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	/* handle last odd byte */
1838c2ecf20Sopenharmony_ci.Lhandle_1:
1848c2ecf20Sopenharmony_ci	testb $1, %r10b
1858c2ecf20Sopenharmony_ci	jz    .Lende
1868c2ecf20Sopenharmony_ci	xorl  %ebx, %ebx
1878c2ecf20Sopenharmony_ci	source
1888c2ecf20Sopenharmony_ci	movb (%rdi), %bl
1898c2ecf20Sopenharmony_ci	dest
1908c2ecf20Sopenharmony_ci	movb %bl, (%rsi)
1918c2ecf20Sopenharmony_ci	addl %ebx, %eax
1928c2ecf20Sopenharmony_ci	adcl %r9d, %eax		/* carry */
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci.Lende:
1958c2ecf20Sopenharmony_ci	testq %r10, %r10
1968c2ecf20Sopenharmony_ci	js  .Lwas_odd
1978c2ecf20Sopenharmony_ci.Lout:
1988c2ecf20Sopenharmony_ci	movq 0*8(%rsp), %rbx
1998c2ecf20Sopenharmony_ci	movq 1*8(%rsp), %r12
2008c2ecf20Sopenharmony_ci	movq 2*8(%rsp), %r14
2018c2ecf20Sopenharmony_ci	movq 3*8(%rsp), %r13
2028c2ecf20Sopenharmony_ci	movq 4*8(%rsp), %r15
2038c2ecf20Sopenharmony_ci	addq $5*8, %rsp
2048c2ecf20Sopenharmony_ci	RET
2058c2ecf20Sopenharmony_ci.Lshort:
2068c2ecf20Sopenharmony_ci	movl %ecx, %r10d
2078c2ecf20Sopenharmony_ci	jmp  .L1
2088c2ecf20Sopenharmony_ci.Lunaligned:
2098c2ecf20Sopenharmony_ci	xorl %ebx, %ebx
2108c2ecf20Sopenharmony_ci	testb $1, %sil
2118c2ecf20Sopenharmony_ci	jne  .Lodd
2128c2ecf20Sopenharmony_ci1:	testb $2, %sil
2138c2ecf20Sopenharmony_ci	je   2f
2148c2ecf20Sopenharmony_ci	source
2158c2ecf20Sopenharmony_ci	movw (%rdi), %bx
2168c2ecf20Sopenharmony_ci	dest
2178c2ecf20Sopenharmony_ci	movw %bx, (%rsi)
2188c2ecf20Sopenharmony_ci	leaq 2(%rdi), %rdi
2198c2ecf20Sopenharmony_ci	subq $2, %rcx
2208c2ecf20Sopenharmony_ci	leaq 2(%rsi), %rsi
2218c2ecf20Sopenharmony_ci	addq %rbx, %rax
2228c2ecf20Sopenharmony_ci2:	testb $4, %sil
2238c2ecf20Sopenharmony_ci	je .Laligned
2248c2ecf20Sopenharmony_ci	source
2258c2ecf20Sopenharmony_ci	movl (%rdi), %ebx
2268c2ecf20Sopenharmony_ci	dest
2278c2ecf20Sopenharmony_ci	movl %ebx, (%rsi)
2288c2ecf20Sopenharmony_ci	leaq 4(%rdi), %rdi
2298c2ecf20Sopenharmony_ci	subq $4, %rcx
2308c2ecf20Sopenharmony_ci	leaq 4(%rsi), %rsi
2318c2ecf20Sopenharmony_ci	addq %rbx, %rax
2328c2ecf20Sopenharmony_ci	jmp .Laligned
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci.Lodd:
2358c2ecf20Sopenharmony_ci	source
2368c2ecf20Sopenharmony_ci	movb (%rdi), %bl
2378c2ecf20Sopenharmony_ci	dest
2388c2ecf20Sopenharmony_ci	movb %bl, (%rsi)
2398c2ecf20Sopenharmony_ci	leaq 1(%rdi), %rdi
2408c2ecf20Sopenharmony_ci	leaq 1(%rsi), %rsi
2418c2ecf20Sopenharmony_ci	/* decrement, set MSB */
2428c2ecf20Sopenharmony_ci	leaq -1(%rcx, %rcx), %rcx
2438c2ecf20Sopenharmony_ci	rorq $1, %rcx
2448c2ecf20Sopenharmony_ci	shll $8, %ebx
2458c2ecf20Sopenharmony_ci	addq %rbx, %rax
2468c2ecf20Sopenharmony_ci	jmp 1b
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci.Lwas_odd:
2498c2ecf20Sopenharmony_ci	roll $8, %eax
2508c2ecf20Sopenharmony_ci	jmp .Lout
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	/* Exception: just return 0 */
2538c2ecf20Sopenharmony_ci.Lfault:
2548c2ecf20Sopenharmony_ci	xorl %eax, %eax
2558c2ecf20Sopenharmony_ci	jmp  .Lout
2568c2ecf20Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic)
257