162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public
562306a36Sopenharmony_ci * License.  See the file COPYING in the main directory of this archive
662306a36Sopenharmony_ci * for more details. No warranty for anything given at all.
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci#include <linux/linkage.h>
962306a36Sopenharmony_ci#include <asm/errno.h>
1062306a36Sopenharmony_ci#include <asm/asm.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci/*
1362306a36Sopenharmony_ci * Checksum copy with exception handling.
1462306a36Sopenharmony_ci * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
1562306a36Sopenharmony_ci * destination is zeroed.
1662306a36Sopenharmony_ci *
1762306a36Sopenharmony_ci * Input
1862306a36Sopenharmony_ci * rdi  source
1962306a36Sopenharmony_ci * rsi  destination
2062306a36Sopenharmony_ci * edx  len (32bit)
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * Output
2362306a36Sopenharmony_ci * eax  64bit sum. undefined in case of exception.
2462306a36Sopenharmony_ci *
2562306a36Sopenharmony_ci * Wrappers need to take care of valid exception sum and zeroing.
2662306a36Sopenharmony_ci * They also should align source or destination to 8 bytes.
2762306a36Sopenharmony_ci */
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci	.macro source
3062306a36Sopenharmony_ci10:
3162306a36Sopenharmony_ci	_ASM_EXTABLE_UA(10b, .Lfault)
3262306a36Sopenharmony_ci	.endm
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci	.macro dest
3562306a36Sopenharmony_ci20:
3662306a36Sopenharmony_ci	_ASM_EXTABLE_UA(20b, .Lfault)
3762306a36Sopenharmony_ci	.endm
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic)
4062306a36Sopenharmony_ci	subq  $5*8, %rsp
4162306a36Sopenharmony_ci	movq  %rbx, 0*8(%rsp)
4262306a36Sopenharmony_ci	movq  %r12, 1*8(%rsp)
4362306a36Sopenharmony_ci	movq  %r14, 2*8(%rsp)
4462306a36Sopenharmony_ci	movq  %r13, 3*8(%rsp)
4562306a36Sopenharmony_ci	movq  %r15, 4*8(%rsp)
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	movl  $-1, %eax
4862306a36Sopenharmony_ci	xorl  %r9d, %r9d
4962306a36Sopenharmony_ci	movl  %edx, %ecx
5062306a36Sopenharmony_ci	cmpl  $8, %ecx
5162306a36Sopenharmony_ci	jb    .Lshort
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	testb  $7, %sil
5462306a36Sopenharmony_ci	jne   .Lunaligned
5562306a36Sopenharmony_ci.Laligned:
5662306a36Sopenharmony_ci	movl  %ecx, %r12d
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	shrq  $6, %r12
5962306a36Sopenharmony_ci	jz	.Lhandle_tail       /* < 64 */
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	clc
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	/* main loop. clear in 64 byte blocks */
6462306a36Sopenharmony_ci	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
6562306a36Sopenharmony_ci	/* r11:	temp3, rdx: temp4, r12 loopcnt */
6662306a36Sopenharmony_ci	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
6762306a36Sopenharmony_ci	.p2align 4
6862306a36Sopenharmony_ci.Lloop:
6962306a36Sopenharmony_ci	source
7062306a36Sopenharmony_ci	movq  (%rdi), %rbx
7162306a36Sopenharmony_ci	source
7262306a36Sopenharmony_ci	movq  8(%rdi), %r8
7362306a36Sopenharmony_ci	source
7462306a36Sopenharmony_ci	movq  16(%rdi), %r11
7562306a36Sopenharmony_ci	source
7662306a36Sopenharmony_ci	movq  24(%rdi), %rdx
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	source
7962306a36Sopenharmony_ci	movq  32(%rdi), %r10
8062306a36Sopenharmony_ci	source
8162306a36Sopenharmony_ci	movq  40(%rdi), %r15
8262306a36Sopenharmony_ci	source
8362306a36Sopenharmony_ci	movq  48(%rdi), %r14
8462306a36Sopenharmony_ci	source
8562306a36Sopenharmony_ci	movq  56(%rdi), %r13
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci30:
8862306a36Sopenharmony_ci	/*
8962306a36Sopenharmony_ci	 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
9062306a36Sopenharmony_ci	 * potentially unmapped kernel address.
9162306a36Sopenharmony_ci	 */
9262306a36Sopenharmony_ci	_ASM_EXTABLE(30b, 2f)
9362306a36Sopenharmony_ci	prefetcht0 5*64(%rdi)
9462306a36Sopenharmony_ci2:
9562306a36Sopenharmony_ci	adcq  %rbx, %rax
9662306a36Sopenharmony_ci	adcq  %r8, %rax
9762306a36Sopenharmony_ci	adcq  %r11, %rax
9862306a36Sopenharmony_ci	adcq  %rdx, %rax
9962306a36Sopenharmony_ci	adcq  %r10, %rax
10062306a36Sopenharmony_ci	adcq  %r15, %rax
10162306a36Sopenharmony_ci	adcq  %r14, %rax
10262306a36Sopenharmony_ci	adcq  %r13, %rax
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	decl %r12d
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	dest
10762306a36Sopenharmony_ci	movq %rbx, (%rsi)
10862306a36Sopenharmony_ci	dest
10962306a36Sopenharmony_ci	movq %r8, 8(%rsi)
11062306a36Sopenharmony_ci	dest
11162306a36Sopenharmony_ci	movq %r11, 16(%rsi)
11262306a36Sopenharmony_ci	dest
11362306a36Sopenharmony_ci	movq %rdx, 24(%rsi)
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	dest
11662306a36Sopenharmony_ci	movq %r10, 32(%rsi)
11762306a36Sopenharmony_ci	dest
11862306a36Sopenharmony_ci	movq %r15, 40(%rsi)
11962306a36Sopenharmony_ci	dest
12062306a36Sopenharmony_ci	movq %r14, 48(%rsi)
12162306a36Sopenharmony_ci	dest
12262306a36Sopenharmony_ci	movq %r13, 56(%rsi)
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	leaq 64(%rdi), %rdi
12562306a36Sopenharmony_ci	leaq 64(%rsi), %rsi
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	jnz	.Lloop
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	adcq  %r9, %rax
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	/* do last up to 56 bytes */
13262306a36Sopenharmony_ci.Lhandle_tail:
13362306a36Sopenharmony_ci	/* ecx:	count, rcx.63: the end result needs to be rol8 */
13462306a36Sopenharmony_ci	movq %rcx, %r10
13562306a36Sopenharmony_ci	andl $63, %ecx
13662306a36Sopenharmony_ci	shrl $3, %ecx
13762306a36Sopenharmony_ci	jz	.Lfold
13862306a36Sopenharmony_ci	clc
13962306a36Sopenharmony_ci	.p2align 4
14062306a36Sopenharmony_ci.Lloop_8:
14162306a36Sopenharmony_ci	source
14262306a36Sopenharmony_ci	movq (%rdi), %rbx
14362306a36Sopenharmony_ci	adcq %rbx, %rax
14462306a36Sopenharmony_ci	decl %ecx
14562306a36Sopenharmony_ci	dest
14662306a36Sopenharmony_ci	movq %rbx, (%rsi)
14762306a36Sopenharmony_ci	leaq 8(%rsi), %rsi /* preserve carry */
14862306a36Sopenharmony_ci	leaq 8(%rdi), %rdi
14962306a36Sopenharmony_ci	jnz	.Lloop_8
15062306a36Sopenharmony_ci	adcq %r9, %rax	/* add in carry */
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci.Lfold:
15362306a36Sopenharmony_ci	/* reduce checksum to 32bits */
15462306a36Sopenharmony_ci	movl %eax, %ebx
15562306a36Sopenharmony_ci	shrq $32, %rax
15662306a36Sopenharmony_ci	addl %ebx, %eax
15762306a36Sopenharmony_ci	adcl %r9d, %eax
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	/* do last up to 6 bytes */
16062306a36Sopenharmony_ci.Lhandle_7:
16162306a36Sopenharmony_ci	movl %r10d, %ecx
16262306a36Sopenharmony_ci	andl $7, %ecx
16362306a36Sopenharmony_ci.L1:				/* .Lshort rejoins the common path here */
16462306a36Sopenharmony_ci	shrl $1, %ecx
16562306a36Sopenharmony_ci	jz   .Lhandle_1
16662306a36Sopenharmony_ci	movl $2, %edx
16762306a36Sopenharmony_ci	xorl %ebx, %ebx
16862306a36Sopenharmony_ci	clc
16962306a36Sopenharmony_ci	.p2align 4
17062306a36Sopenharmony_ci.Lloop_1:
17162306a36Sopenharmony_ci	source
17262306a36Sopenharmony_ci	movw (%rdi), %bx
17362306a36Sopenharmony_ci	adcl %ebx, %eax
17462306a36Sopenharmony_ci	decl %ecx
17562306a36Sopenharmony_ci	dest
17662306a36Sopenharmony_ci	movw %bx, (%rsi)
17762306a36Sopenharmony_ci	leaq 2(%rdi), %rdi
17862306a36Sopenharmony_ci	leaq 2(%rsi), %rsi
17962306a36Sopenharmony_ci	jnz .Lloop_1
18062306a36Sopenharmony_ci	adcl %r9d, %eax	/* add in carry */
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	/* handle last odd byte */
18362306a36Sopenharmony_ci.Lhandle_1:
18462306a36Sopenharmony_ci	testb $1, %r10b
18562306a36Sopenharmony_ci	jz    .Lende
18662306a36Sopenharmony_ci	xorl  %ebx, %ebx
18762306a36Sopenharmony_ci	source
18862306a36Sopenharmony_ci	movb (%rdi), %bl
18962306a36Sopenharmony_ci	dest
19062306a36Sopenharmony_ci	movb %bl, (%rsi)
19162306a36Sopenharmony_ci	addl %ebx, %eax
19262306a36Sopenharmony_ci	adcl %r9d, %eax		/* carry */
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci.Lende:
19562306a36Sopenharmony_ci	testq %r10, %r10
19662306a36Sopenharmony_ci	js  .Lwas_odd
19762306a36Sopenharmony_ci.Lout:
19862306a36Sopenharmony_ci	movq 0*8(%rsp), %rbx
19962306a36Sopenharmony_ci	movq 1*8(%rsp), %r12
20062306a36Sopenharmony_ci	movq 2*8(%rsp), %r14
20162306a36Sopenharmony_ci	movq 3*8(%rsp), %r13
20262306a36Sopenharmony_ci	movq 4*8(%rsp), %r15
20362306a36Sopenharmony_ci	addq $5*8, %rsp
20462306a36Sopenharmony_ci	RET
20562306a36Sopenharmony_ci.Lshort:
20662306a36Sopenharmony_ci	movl %ecx, %r10d
20762306a36Sopenharmony_ci	jmp  .L1
20862306a36Sopenharmony_ci.Lunaligned:
20962306a36Sopenharmony_ci	xorl %ebx, %ebx
21062306a36Sopenharmony_ci	testb $1, %sil
21162306a36Sopenharmony_ci	jne  .Lodd
21262306a36Sopenharmony_ci1:	testb $2, %sil
21362306a36Sopenharmony_ci	je   2f
21462306a36Sopenharmony_ci	source
21562306a36Sopenharmony_ci	movw (%rdi), %bx
21662306a36Sopenharmony_ci	dest
21762306a36Sopenharmony_ci	movw %bx, (%rsi)
21862306a36Sopenharmony_ci	leaq 2(%rdi), %rdi
21962306a36Sopenharmony_ci	subq $2, %rcx
22062306a36Sopenharmony_ci	leaq 2(%rsi), %rsi
22162306a36Sopenharmony_ci	addq %rbx, %rax
22262306a36Sopenharmony_ci2:	testb $4, %sil
22362306a36Sopenharmony_ci	je .Laligned
22462306a36Sopenharmony_ci	source
22562306a36Sopenharmony_ci	movl (%rdi), %ebx
22662306a36Sopenharmony_ci	dest
22762306a36Sopenharmony_ci	movl %ebx, (%rsi)
22862306a36Sopenharmony_ci	leaq 4(%rdi), %rdi
22962306a36Sopenharmony_ci	subq $4, %rcx
23062306a36Sopenharmony_ci	leaq 4(%rsi), %rsi
23162306a36Sopenharmony_ci	addq %rbx, %rax
23262306a36Sopenharmony_ci	jmp .Laligned
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci.Lodd:
23562306a36Sopenharmony_ci	source
23662306a36Sopenharmony_ci	movb (%rdi), %bl
23762306a36Sopenharmony_ci	dest
23862306a36Sopenharmony_ci	movb %bl, (%rsi)
23962306a36Sopenharmony_ci	leaq 1(%rdi), %rdi
24062306a36Sopenharmony_ci	leaq 1(%rsi), %rsi
24162306a36Sopenharmony_ci	/* decrement, set MSB */
24262306a36Sopenharmony_ci	leaq -1(%rcx, %rcx), %rcx
24362306a36Sopenharmony_ci	rorq $1, %rcx
24462306a36Sopenharmony_ci	shll $8, %ebx
24562306a36Sopenharmony_ci	addq %rbx, %rax
24662306a36Sopenharmony_ci	jmp 1b
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci.Lwas_odd:
24962306a36Sopenharmony_ci	roll $8, %eax
25062306a36Sopenharmony_ci	jmp .Lout
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	/* Exception: just return 0 */
25362306a36Sopenharmony_ci.Lfault:
25462306a36Sopenharmony_ci	xorl %eax, %eax
25562306a36Sopenharmony_ci	jmp  .Lout
25662306a36Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic)
257