162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright 2002, 2003 Andi Kleen, SuSE Labs. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public 562306a36Sopenharmony_ci * License. See the file COPYING in the main directory of this archive 662306a36Sopenharmony_ci * for more details. No warranty for anything given at all. 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci#include <linux/linkage.h> 962306a36Sopenharmony_ci#include <asm/errno.h> 1062306a36Sopenharmony_ci#include <asm/asm.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci/* 1362306a36Sopenharmony_ci * Checksum copy with exception handling. 1462306a36Sopenharmony_ci * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 1562306a36Sopenharmony_ci * destination is zeroed. 1662306a36Sopenharmony_ci * 1762306a36Sopenharmony_ci * Input 1862306a36Sopenharmony_ci * rdi source 1962306a36Sopenharmony_ci * rsi destination 2062306a36Sopenharmony_ci * edx len (32bit) 2162306a36Sopenharmony_ci * 2262306a36Sopenharmony_ci * Output 2362306a36Sopenharmony_ci * eax 64bit sum. undefined in case of exception. 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * Wrappers need to take care of valid exception sum and zeroing. 2662306a36Sopenharmony_ci * They also should align source or destination to 8 bytes. 2762306a36Sopenharmony_ci */ 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci .macro source 3062306a36Sopenharmony_ci10: 3162306a36Sopenharmony_ci _ASM_EXTABLE_UA(10b, .Lfault) 3262306a36Sopenharmony_ci .endm 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci .macro dest 3562306a36Sopenharmony_ci20: 3662306a36Sopenharmony_ci _ASM_EXTABLE_UA(20b, .Lfault) 3762306a36Sopenharmony_ci .endm 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic) 4062306a36Sopenharmony_ci subq $5*8, %rsp 4162306a36Sopenharmony_ci movq %rbx, 0*8(%rsp) 4262306a36Sopenharmony_ci movq %r12, 1*8(%rsp) 4362306a36Sopenharmony_ci movq %r14, 2*8(%rsp) 4462306a36Sopenharmony_ci movq %r13, 3*8(%rsp) 4562306a36Sopenharmony_ci movq %r15, 4*8(%rsp) 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci movl $-1, %eax 4862306a36Sopenharmony_ci xorl %r9d, %r9d 4962306a36Sopenharmony_ci movl %edx, %ecx 5062306a36Sopenharmony_ci cmpl $8, %ecx 5162306a36Sopenharmony_ci jb .Lshort 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci testb $7, %sil 5462306a36Sopenharmony_ci jne .Lunaligned 5562306a36Sopenharmony_ci.Laligned: 5662306a36Sopenharmony_ci movl %ecx, %r12d 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci shrq $6, %r12 5962306a36Sopenharmony_ci jz .Lhandle_tail /* < 64 */ 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci clc 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci /* main loop. clear in 64 byte blocks */ 6462306a36Sopenharmony_ci /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 6562306a36Sopenharmony_ci /* r11: temp3, rdx: temp4, r12 loopcnt */ 6662306a36Sopenharmony_ci /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ 6762306a36Sopenharmony_ci .p2align 4 6862306a36Sopenharmony_ci.Lloop: 6962306a36Sopenharmony_ci source 7062306a36Sopenharmony_ci movq (%rdi), %rbx 7162306a36Sopenharmony_ci source 7262306a36Sopenharmony_ci movq 8(%rdi), %r8 7362306a36Sopenharmony_ci source 7462306a36Sopenharmony_ci movq 16(%rdi), %r11 7562306a36Sopenharmony_ci source 7662306a36Sopenharmony_ci movq 24(%rdi), %rdx 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci source 7962306a36Sopenharmony_ci movq 32(%rdi), %r10 8062306a36Sopenharmony_ci source 8162306a36Sopenharmony_ci movq 40(%rdi), %r15 8262306a36Sopenharmony_ci source 8362306a36Sopenharmony_ci movq 48(%rdi), %r14 8462306a36Sopenharmony_ci source 8562306a36Sopenharmony_ci movq 56(%rdi), %r13 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci30: 8862306a36Sopenharmony_ci /* 8962306a36Sopenharmony_ci * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a 9062306a36Sopenharmony_ci * potentially unmapped kernel address. 9162306a36Sopenharmony_ci */ 9262306a36Sopenharmony_ci _ASM_EXTABLE(30b, 2f) 9362306a36Sopenharmony_ci prefetcht0 5*64(%rdi) 9462306a36Sopenharmony_ci2: 9562306a36Sopenharmony_ci adcq %rbx, %rax 9662306a36Sopenharmony_ci adcq %r8, %rax 9762306a36Sopenharmony_ci adcq %r11, %rax 9862306a36Sopenharmony_ci adcq %rdx, %rax 9962306a36Sopenharmony_ci adcq %r10, %rax 10062306a36Sopenharmony_ci adcq %r15, %rax 10162306a36Sopenharmony_ci adcq %r14, %rax 10262306a36Sopenharmony_ci adcq %r13, %rax 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci decl %r12d 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci dest 10762306a36Sopenharmony_ci movq %rbx, (%rsi) 10862306a36Sopenharmony_ci dest 10962306a36Sopenharmony_ci movq %r8, 8(%rsi) 11062306a36Sopenharmony_ci dest 11162306a36Sopenharmony_ci movq %r11, 16(%rsi) 11262306a36Sopenharmony_ci dest 11362306a36Sopenharmony_ci movq %rdx, 24(%rsi) 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci dest 11662306a36Sopenharmony_ci movq %r10, 32(%rsi) 11762306a36Sopenharmony_ci dest 11862306a36Sopenharmony_ci movq %r15, 40(%rsi) 11962306a36Sopenharmony_ci dest 12062306a36Sopenharmony_ci movq %r14, 48(%rsi) 12162306a36Sopenharmony_ci dest 12262306a36Sopenharmony_ci movq %r13, 56(%rsi) 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci leaq 64(%rdi), %rdi 12562306a36Sopenharmony_ci leaq 64(%rsi), %rsi 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci jnz .Lloop 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci adcq %r9, %rax 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci /* do last up to 56 bytes */ 13262306a36Sopenharmony_ci.Lhandle_tail: 13362306a36Sopenharmony_ci /* ecx: count, rcx.63: the end result needs to be rol8 */ 13462306a36Sopenharmony_ci movq %rcx, %r10 13562306a36Sopenharmony_ci andl $63, %ecx 13662306a36Sopenharmony_ci shrl $3, %ecx 13762306a36Sopenharmony_ci jz .Lfold 13862306a36Sopenharmony_ci clc 13962306a36Sopenharmony_ci .p2align 4 14062306a36Sopenharmony_ci.Lloop_8: 14162306a36Sopenharmony_ci source 14262306a36Sopenharmony_ci movq (%rdi), %rbx 14362306a36Sopenharmony_ci adcq %rbx, %rax 14462306a36Sopenharmony_ci decl %ecx 14562306a36Sopenharmony_ci dest 14662306a36Sopenharmony_ci movq %rbx, (%rsi) 14762306a36Sopenharmony_ci leaq 8(%rsi), %rsi /* preserve carry */ 14862306a36Sopenharmony_ci leaq 8(%rdi), %rdi 14962306a36Sopenharmony_ci jnz .Lloop_8 15062306a36Sopenharmony_ci adcq %r9, %rax /* add in carry */ 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci.Lfold: 15362306a36Sopenharmony_ci /* reduce checksum to 32bits */ 15462306a36Sopenharmony_ci movl %eax, %ebx 15562306a36Sopenharmony_ci shrq $32, %rax 15662306a36Sopenharmony_ci addl %ebx, %eax 15762306a36Sopenharmony_ci adcl %r9d, %eax 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci /* do last up to 6 bytes */ 16062306a36Sopenharmony_ci.Lhandle_7: 16162306a36Sopenharmony_ci movl %r10d, %ecx 16262306a36Sopenharmony_ci andl $7, %ecx 16362306a36Sopenharmony_ci.L1: /* .Lshort rejoins the common path here */ 16462306a36Sopenharmony_ci shrl $1, %ecx 16562306a36Sopenharmony_ci jz .Lhandle_1 16662306a36Sopenharmony_ci movl $2, %edx 16762306a36Sopenharmony_ci xorl %ebx, %ebx 16862306a36Sopenharmony_ci clc 16962306a36Sopenharmony_ci .p2align 4 17062306a36Sopenharmony_ci.Lloop_1: 17162306a36Sopenharmony_ci source 17262306a36Sopenharmony_ci movw (%rdi), %bx 17362306a36Sopenharmony_ci adcl %ebx, %eax 17462306a36Sopenharmony_ci decl %ecx 17562306a36Sopenharmony_ci dest 17662306a36Sopenharmony_ci movw %bx, (%rsi) 17762306a36Sopenharmony_ci leaq 2(%rdi), %rdi 17862306a36Sopenharmony_ci leaq 2(%rsi), %rsi 17962306a36Sopenharmony_ci jnz .Lloop_1 18062306a36Sopenharmony_ci adcl %r9d, %eax /* add in carry */ 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci /* handle last odd byte */ 18362306a36Sopenharmony_ci.Lhandle_1: 18462306a36Sopenharmony_ci testb $1, %r10b 18562306a36Sopenharmony_ci jz .Lende 18662306a36Sopenharmony_ci xorl %ebx, %ebx 18762306a36Sopenharmony_ci source 18862306a36Sopenharmony_ci movb (%rdi), %bl 18962306a36Sopenharmony_ci dest 19062306a36Sopenharmony_ci movb %bl, (%rsi) 19162306a36Sopenharmony_ci addl %ebx, %eax 19262306a36Sopenharmony_ci adcl %r9d, %eax /* carry */ 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci.Lende: 19562306a36Sopenharmony_ci testq %r10, %r10 19662306a36Sopenharmony_ci js .Lwas_odd 19762306a36Sopenharmony_ci.Lout: 19862306a36Sopenharmony_ci movq 0*8(%rsp), %rbx 19962306a36Sopenharmony_ci movq 1*8(%rsp), %r12 20062306a36Sopenharmony_ci movq 2*8(%rsp), %r14 20162306a36Sopenharmony_ci movq 3*8(%rsp), %r13 20262306a36Sopenharmony_ci movq 4*8(%rsp), %r15 20362306a36Sopenharmony_ci addq $5*8, %rsp 20462306a36Sopenharmony_ci RET 20562306a36Sopenharmony_ci.Lshort: 20662306a36Sopenharmony_ci movl %ecx, %r10d 20762306a36Sopenharmony_ci jmp .L1 20862306a36Sopenharmony_ci.Lunaligned: 20962306a36Sopenharmony_ci xorl %ebx, %ebx 21062306a36Sopenharmony_ci testb $1, %sil 21162306a36Sopenharmony_ci jne .Lodd 21262306a36Sopenharmony_ci1: testb $2, %sil 21362306a36Sopenharmony_ci je 2f 21462306a36Sopenharmony_ci source 21562306a36Sopenharmony_ci movw (%rdi), %bx 21662306a36Sopenharmony_ci dest 21762306a36Sopenharmony_ci movw %bx, (%rsi) 21862306a36Sopenharmony_ci leaq 2(%rdi), %rdi 21962306a36Sopenharmony_ci subq $2, %rcx 22062306a36Sopenharmony_ci leaq 2(%rsi), %rsi 22162306a36Sopenharmony_ci addq %rbx, %rax 22262306a36Sopenharmony_ci2: testb $4, %sil 22362306a36Sopenharmony_ci je .Laligned 22462306a36Sopenharmony_ci source 22562306a36Sopenharmony_ci movl (%rdi), %ebx 22662306a36Sopenharmony_ci dest 22762306a36Sopenharmony_ci movl %ebx, (%rsi) 22862306a36Sopenharmony_ci leaq 4(%rdi), %rdi 22962306a36Sopenharmony_ci subq $4, %rcx 23062306a36Sopenharmony_ci leaq 4(%rsi), %rsi 23162306a36Sopenharmony_ci addq %rbx, %rax 23262306a36Sopenharmony_ci jmp .Laligned 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci.Lodd: 23562306a36Sopenharmony_ci source 23662306a36Sopenharmony_ci movb (%rdi), %bl 23762306a36Sopenharmony_ci dest 23862306a36Sopenharmony_ci movb %bl, (%rsi) 23962306a36Sopenharmony_ci leaq 1(%rdi), %rdi 24062306a36Sopenharmony_ci leaq 1(%rsi), %rsi 24162306a36Sopenharmony_ci /* decrement, set MSB */ 24262306a36Sopenharmony_ci leaq -1(%rcx, %rcx), %rcx 24362306a36Sopenharmony_ci rorq $1, %rcx 24462306a36Sopenharmony_ci shll $8, %ebx 24562306a36Sopenharmony_ci addq %rbx, %rax 24662306a36Sopenharmony_ci jmp 1b 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci.Lwas_odd: 24962306a36Sopenharmony_ci roll $8, %eax 25062306a36Sopenharmony_ci jmp .Lout 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci /* Exception: just return 0 */ 25362306a36Sopenharmony_ci.Lfault: 25462306a36Sopenharmony_ci xorl %eax, %eax 25562306a36Sopenharmony_ci jmp .Lout 25662306a36Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic) 257