18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright 2002, 2003 Andi Kleen, SuSE Labs. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This file is subject to the terms and conditions of the GNU General Public 58c2ecf20Sopenharmony_ci * License. See the file COPYING in the main directory of this archive 68c2ecf20Sopenharmony_ci * for more details. No warranty for anything given at all. 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci#include <linux/linkage.h> 98c2ecf20Sopenharmony_ci#include <asm/errno.h> 108c2ecf20Sopenharmony_ci#include <asm/asm.h> 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci/* 138c2ecf20Sopenharmony_ci * Checksum copy with exception handling. 148c2ecf20Sopenharmony_ci * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 158c2ecf20Sopenharmony_ci * destination is zeroed. 168c2ecf20Sopenharmony_ci * 178c2ecf20Sopenharmony_ci * Input 188c2ecf20Sopenharmony_ci * rdi source 198c2ecf20Sopenharmony_ci * rsi destination 208c2ecf20Sopenharmony_ci * edx len (32bit) 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * Output 238c2ecf20Sopenharmony_ci * eax 64bit sum. undefined in case of exception. 248c2ecf20Sopenharmony_ci * 258c2ecf20Sopenharmony_ci * Wrappers need to take care of valid exception sum and zeroing. 268c2ecf20Sopenharmony_ci * They also should align source or destination to 8 bytes. 278c2ecf20Sopenharmony_ci */ 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci .macro source 308c2ecf20Sopenharmony_ci10: 318c2ecf20Sopenharmony_ci _ASM_EXTABLE_UA(10b, .Lfault) 328c2ecf20Sopenharmony_ci .endm 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci .macro dest 358c2ecf20Sopenharmony_ci20: 368c2ecf20Sopenharmony_ci _ASM_EXTABLE_UA(20b, .Lfault) 378c2ecf20Sopenharmony_ci .endm 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic) 408c2ecf20Sopenharmony_ci subq $5*8, %rsp 418c2ecf20Sopenharmony_ci movq %rbx, 0*8(%rsp) 428c2ecf20Sopenharmony_ci movq %r12, 1*8(%rsp) 438c2ecf20Sopenharmony_ci movq %r14, 2*8(%rsp) 448c2ecf20Sopenharmony_ci movq %r13, 3*8(%rsp) 458c2ecf20Sopenharmony_ci movq %r15, 4*8(%rsp) 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci movl $-1, %eax 488c2ecf20Sopenharmony_ci xorl %r9d, %r9d 498c2ecf20Sopenharmony_ci movl %edx, %ecx 508c2ecf20Sopenharmony_ci cmpl $8, %ecx 518c2ecf20Sopenharmony_ci jb .Lshort 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci testb $7, %sil 548c2ecf20Sopenharmony_ci jne .Lunaligned 558c2ecf20Sopenharmony_ci.Laligned: 568c2ecf20Sopenharmony_ci movl %ecx, %r12d 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci shrq $6, %r12 598c2ecf20Sopenharmony_ci jz .Lhandle_tail /* < 64 */ 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci clc 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci /* main loop. clear in 64 byte blocks */ 648c2ecf20Sopenharmony_ci /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ 658c2ecf20Sopenharmony_ci /* r11: temp3, rdx: temp4, r12 loopcnt */ 668c2ecf20Sopenharmony_ci /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ 678c2ecf20Sopenharmony_ci .p2align 4 688c2ecf20Sopenharmony_ci.Lloop: 698c2ecf20Sopenharmony_ci source 708c2ecf20Sopenharmony_ci movq (%rdi), %rbx 718c2ecf20Sopenharmony_ci source 728c2ecf20Sopenharmony_ci movq 8(%rdi), %r8 738c2ecf20Sopenharmony_ci source 748c2ecf20Sopenharmony_ci movq 16(%rdi), %r11 758c2ecf20Sopenharmony_ci source 768c2ecf20Sopenharmony_ci movq 24(%rdi), %rdx 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci source 798c2ecf20Sopenharmony_ci movq 32(%rdi), %r10 808c2ecf20Sopenharmony_ci source 818c2ecf20Sopenharmony_ci movq 40(%rdi), %r15 828c2ecf20Sopenharmony_ci source 838c2ecf20Sopenharmony_ci movq 48(%rdi), %r14 848c2ecf20Sopenharmony_ci source 858c2ecf20Sopenharmony_ci movq 56(%rdi), %r13 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci30: 888c2ecf20Sopenharmony_ci /* 898c2ecf20Sopenharmony_ci * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a 908c2ecf20Sopenharmony_ci * potentially unmapped kernel address. 918c2ecf20Sopenharmony_ci */ 928c2ecf20Sopenharmony_ci _ASM_EXTABLE(30b, 2f) 938c2ecf20Sopenharmony_ci prefetcht0 5*64(%rdi) 948c2ecf20Sopenharmony_ci2: 958c2ecf20Sopenharmony_ci adcq %rbx, %rax 968c2ecf20Sopenharmony_ci adcq %r8, %rax 978c2ecf20Sopenharmony_ci adcq %r11, %rax 988c2ecf20Sopenharmony_ci adcq %rdx, %rax 998c2ecf20Sopenharmony_ci adcq %r10, %rax 1008c2ecf20Sopenharmony_ci adcq %r15, %rax 1018c2ecf20Sopenharmony_ci adcq %r14, %rax 1028c2ecf20Sopenharmony_ci adcq %r13, %rax 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci decl %r12d 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci dest 1078c2ecf20Sopenharmony_ci movq %rbx, (%rsi) 1088c2ecf20Sopenharmony_ci dest 1098c2ecf20Sopenharmony_ci movq %r8, 8(%rsi) 1108c2ecf20Sopenharmony_ci dest 1118c2ecf20Sopenharmony_ci movq %r11, 16(%rsi) 1128c2ecf20Sopenharmony_ci dest 1138c2ecf20Sopenharmony_ci movq %rdx, 24(%rsi) 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci dest 1168c2ecf20Sopenharmony_ci movq %r10, 32(%rsi) 1178c2ecf20Sopenharmony_ci dest 1188c2ecf20Sopenharmony_ci movq %r15, 40(%rsi) 1198c2ecf20Sopenharmony_ci dest 1208c2ecf20Sopenharmony_ci movq %r14, 48(%rsi) 1218c2ecf20Sopenharmony_ci dest 1228c2ecf20Sopenharmony_ci movq %r13, 56(%rsi) 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci leaq 64(%rdi), %rdi 1258c2ecf20Sopenharmony_ci leaq 64(%rsi), %rsi 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci jnz .Lloop 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci adcq %r9, %rax 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci /* do last up to 56 bytes */ 1328c2ecf20Sopenharmony_ci.Lhandle_tail: 1338c2ecf20Sopenharmony_ci /* ecx: count, rcx.63: the end result needs to be rol8 */ 1348c2ecf20Sopenharmony_ci movq %rcx, %r10 1358c2ecf20Sopenharmony_ci andl $63, %ecx 1368c2ecf20Sopenharmony_ci shrl $3, %ecx 1378c2ecf20Sopenharmony_ci jz .Lfold 1388c2ecf20Sopenharmony_ci clc 1398c2ecf20Sopenharmony_ci .p2align 4 1408c2ecf20Sopenharmony_ci.Lloop_8: 1418c2ecf20Sopenharmony_ci source 1428c2ecf20Sopenharmony_ci movq (%rdi), %rbx 1438c2ecf20Sopenharmony_ci adcq %rbx, %rax 1448c2ecf20Sopenharmony_ci decl %ecx 1458c2ecf20Sopenharmony_ci dest 1468c2ecf20Sopenharmony_ci movq %rbx, (%rsi) 1478c2ecf20Sopenharmony_ci leaq 8(%rsi), %rsi /* preserve carry */ 1488c2ecf20Sopenharmony_ci leaq 8(%rdi), %rdi 1498c2ecf20Sopenharmony_ci jnz .Lloop_8 1508c2ecf20Sopenharmony_ci adcq %r9, %rax /* add in carry */ 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci.Lfold: 1538c2ecf20Sopenharmony_ci /* reduce checksum to 32bits */ 1548c2ecf20Sopenharmony_ci movl %eax, %ebx 1558c2ecf20Sopenharmony_ci shrq $32, %rax 1568c2ecf20Sopenharmony_ci addl %ebx, %eax 1578c2ecf20Sopenharmony_ci adcl %r9d, %eax 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci /* do last up to 6 bytes */ 1608c2ecf20Sopenharmony_ci.Lhandle_7: 1618c2ecf20Sopenharmony_ci movl %r10d, %ecx 1628c2ecf20Sopenharmony_ci andl $7, %ecx 1638c2ecf20Sopenharmony_ci.L1: /* .Lshort rejoins the common path here */ 1648c2ecf20Sopenharmony_ci shrl $1, %ecx 1658c2ecf20Sopenharmony_ci jz .Lhandle_1 1668c2ecf20Sopenharmony_ci movl $2, %edx 1678c2ecf20Sopenharmony_ci xorl %ebx, %ebx 1688c2ecf20Sopenharmony_ci clc 1698c2ecf20Sopenharmony_ci .p2align 4 1708c2ecf20Sopenharmony_ci.Lloop_1: 1718c2ecf20Sopenharmony_ci source 1728c2ecf20Sopenharmony_ci movw (%rdi), %bx 1738c2ecf20Sopenharmony_ci adcl %ebx, %eax 1748c2ecf20Sopenharmony_ci decl %ecx 1758c2ecf20Sopenharmony_ci dest 1768c2ecf20Sopenharmony_ci movw %bx, (%rsi) 1778c2ecf20Sopenharmony_ci leaq 2(%rdi), %rdi 1788c2ecf20Sopenharmony_ci leaq 2(%rsi), %rsi 1798c2ecf20Sopenharmony_ci jnz .Lloop_1 1808c2ecf20Sopenharmony_ci adcl %r9d, %eax /* add in carry */ 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci /* handle last odd byte */ 1838c2ecf20Sopenharmony_ci.Lhandle_1: 1848c2ecf20Sopenharmony_ci testb $1, %r10b 1858c2ecf20Sopenharmony_ci jz .Lende 1868c2ecf20Sopenharmony_ci xorl %ebx, %ebx 1878c2ecf20Sopenharmony_ci source 1888c2ecf20Sopenharmony_ci movb (%rdi), %bl 1898c2ecf20Sopenharmony_ci dest 1908c2ecf20Sopenharmony_ci movb %bl, (%rsi) 1918c2ecf20Sopenharmony_ci addl %ebx, %eax 1928c2ecf20Sopenharmony_ci adcl %r9d, %eax /* carry */ 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci.Lende: 1958c2ecf20Sopenharmony_ci testq %r10, %r10 1968c2ecf20Sopenharmony_ci js .Lwas_odd 1978c2ecf20Sopenharmony_ci.Lout: 1988c2ecf20Sopenharmony_ci movq 0*8(%rsp), %rbx 1998c2ecf20Sopenharmony_ci movq 1*8(%rsp), %r12 2008c2ecf20Sopenharmony_ci movq 2*8(%rsp), %r14 2018c2ecf20Sopenharmony_ci movq 3*8(%rsp), %r13 2028c2ecf20Sopenharmony_ci movq 4*8(%rsp), %r15 2038c2ecf20Sopenharmony_ci addq $5*8, %rsp 2048c2ecf20Sopenharmony_ci RET 2058c2ecf20Sopenharmony_ci.Lshort: 2068c2ecf20Sopenharmony_ci movl %ecx, %r10d 2078c2ecf20Sopenharmony_ci jmp .L1 2088c2ecf20Sopenharmony_ci.Lunaligned: 2098c2ecf20Sopenharmony_ci xorl %ebx, %ebx 2108c2ecf20Sopenharmony_ci testb $1, %sil 2118c2ecf20Sopenharmony_ci jne .Lodd 2128c2ecf20Sopenharmony_ci1: testb $2, %sil 2138c2ecf20Sopenharmony_ci je 2f 2148c2ecf20Sopenharmony_ci source 2158c2ecf20Sopenharmony_ci movw (%rdi), %bx 2168c2ecf20Sopenharmony_ci dest 2178c2ecf20Sopenharmony_ci movw %bx, (%rsi) 2188c2ecf20Sopenharmony_ci leaq 2(%rdi), %rdi 2198c2ecf20Sopenharmony_ci subq $2, %rcx 2208c2ecf20Sopenharmony_ci leaq 2(%rsi), %rsi 2218c2ecf20Sopenharmony_ci addq %rbx, %rax 2228c2ecf20Sopenharmony_ci2: testb $4, %sil 2238c2ecf20Sopenharmony_ci je .Laligned 2248c2ecf20Sopenharmony_ci source 2258c2ecf20Sopenharmony_ci movl (%rdi), %ebx 2268c2ecf20Sopenharmony_ci dest 2278c2ecf20Sopenharmony_ci movl %ebx, (%rsi) 2288c2ecf20Sopenharmony_ci leaq 4(%rdi), %rdi 2298c2ecf20Sopenharmony_ci subq $4, %rcx 2308c2ecf20Sopenharmony_ci leaq 4(%rsi), %rsi 2318c2ecf20Sopenharmony_ci addq %rbx, %rax 2328c2ecf20Sopenharmony_ci jmp .Laligned 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci.Lodd: 2358c2ecf20Sopenharmony_ci source 2368c2ecf20Sopenharmony_ci movb (%rdi), %bl 2378c2ecf20Sopenharmony_ci dest 2388c2ecf20Sopenharmony_ci movb %bl, (%rsi) 2398c2ecf20Sopenharmony_ci leaq 1(%rdi), %rdi 2408c2ecf20Sopenharmony_ci leaq 1(%rsi), %rsi 2418c2ecf20Sopenharmony_ci /* decrement, set MSB */ 2428c2ecf20Sopenharmony_ci leaq -1(%rcx, %rcx), %rcx 2438c2ecf20Sopenharmony_ci rorq $1, %rcx 2448c2ecf20Sopenharmony_ci shll $8, %ebx 2458c2ecf20Sopenharmony_ci addq %rbx, %rax 2468c2ecf20Sopenharmony_ci jmp 1b 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci.Lwas_odd: 2498c2ecf20Sopenharmony_ci roll $8, %eax 2508c2ecf20Sopenharmony_ci jmp .Lout 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci /* Exception: just return 0 */ 2538c2ecf20Sopenharmony_ci.Lfault: 2548c2ecf20Sopenharmony_ci xorl %eax, %eax 2558c2ecf20Sopenharmony_ci jmp .Lout 2568c2ecf20Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic) 257