18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Optmized version of the ip_fast_csum() function 48c2ecf20Sopenharmony_ci * Used for calculating IP header checksum 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Return: 16bit checksum, complemented 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Inputs: 98c2ecf20Sopenharmony_ci * in0: address of buffer to checksum (char *) 108c2ecf20Sopenharmony_ci * in1: length of the buffer (int) 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2006 Intel Corp. 138c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include <asm/asmmacro.h> 178c2ecf20Sopenharmony_ci#include <asm/export.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci/* 208c2ecf20Sopenharmony_ci * Since we know that most likely this function is called with buf aligned 218c2ecf20Sopenharmony_ci * on 4-byte boundary and 20 bytes in length, we can execution rather quickly 228c2ecf20Sopenharmony_ci * versus calling generic version of do_csum, which has lots of overhead in 238c2ecf20Sopenharmony_ci * handling various alignments and sizes. However, due to lack of constrains 248c2ecf20Sopenharmony_ci * put on the function input argument, cases with alignment not on 4-byte or 258c2ecf20Sopenharmony_ci * size not equal to 20 bytes will be handled by the generic do_csum function. 268c2ecf20Sopenharmony_ci */ 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#define in0 r32 298c2ecf20Sopenharmony_ci#define in1 r33 308c2ecf20Sopenharmony_ci#define in2 r34 318c2ecf20Sopenharmony_ci#define in3 r35 328c2ecf20Sopenharmony_ci#define in4 r36 338c2ecf20Sopenharmony_ci#define ret0 r8 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ciGLOBAL_ENTRY(ip_fast_csum) 368c2ecf20Sopenharmony_ci .prologue 378c2ecf20Sopenharmony_ci .body 388c2ecf20Sopenharmony_ci cmp.ne p6,p7=5,in1 // size other than 20 byte? 398c2ecf20Sopenharmony_ci and r14=3,in0 // is it aligned on 4-byte? 408c2ecf20Sopenharmony_ci add r15=4,in0 // second source pointer 418c2ecf20Sopenharmony_ci ;; 428c2ecf20Sopenharmony_ci cmp.ne.or.andcm p6,p7=r14,r0 438c2ecf20Sopenharmony_ci ;; 448c2ecf20Sopenharmony_ci(p7) ld4 r20=[in0],8 458c2ecf20Sopenharmony_ci(p7) ld4 r21=[r15],8 468c2ecf20Sopenharmony_ci(p6) br.spnt .generic 478c2ecf20Sopenharmony_ci ;; 488c2ecf20Sopenharmony_ci ld4 r22=[in0],8 498c2ecf20Sopenharmony_ci ld4 r23=[r15],8 508c2ecf20Sopenharmony_ci ;; 518c2ecf20Sopenharmony_ci ld4 r24=[in0] 528c2ecf20Sopenharmony_ci add r20=r20,r21 538c2ecf20Sopenharmony_ci add r22=r22,r23 548c2ecf20Sopenharmony_ci ;; 558c2ecf20Sopenharmony_ci add r20=r20,r22 568c2ecf20Sopenharmony_ci ;; 578c2ecf20Sopenharmony_ci add r20=r20,r24 588c2ecf20Sopenharmony_ci ;; 598c2ecf20Sopenharmony_ci shr.u ret0=r20,16 // now need to add the carry 608c2ecf20Sopenharmony_ci zxt2 r20=r20 618c2ecf20Sopenharmony_ci ;; 628c2ecf20Sopenharmony_ci add r20=ret0,r20 638c2ecf20Sopenharmony_ci ;; 648c2ecf20Sopenharmony_ci shr.u ret0=r20,16 // add carry again 658c2ecf20Sopenharmony_ci zxt2 r20=r20 668c2ecf20Sopenharmony_ci ;; 678c2ecf20Sopenharmony_ci add r20=ret0,r20 688c2ecf20Sopenharmony_ci ;; 698c2ecf20Sopenharmony_ci shr.u ret0=r20,16 708c2ecf20Sopenharmony_ci zxt2 r20=r20 718c2ecf20Sopenharmony_ci ;; 728c2ecf20Sopenharmony_ci add r20=ret0,r20 738c2ecf20Sopenharmony_ci mov r9=0xffff 748c2ecf20Sopenharmony_ci ;; 758c2ecf20Sopenharmony_ci andcm ret0=r9,r20 768c2ecf20Sopenharmony_ci .restore sp // reset frame state 778c2ecf20Sopenharmony_ci br.ret.sptk.many b0 788c2ecf20Sopenharmony_ci ;; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci.generic: 818c2ecf20Sopenharmony_ci .prologue 828c2ecf20Sopenharmony_ci .save ar.pfs, r35 838c2ecf20Sopenharmony_ci alloc r35=ar.pfs,2,2,2,0 848c2ecf20Sopenharmony_ci .save rp, r34 858c2ecf20Sopenharmony_ci mov r34=b0 868c2ecf20Sopenharmony_ci .body 878c2ecf20Sopenharmony_ci dep.z out1=in1,2,30 888c2ecf20Sopenharmony_ci mov out0=in0 898c2ecf20Sopenharmony_ci ;; 908c2ecf20Sopenharmony_ci br.call.sptk.many b0=do_csum 918c2ecf20Sopenharmony_ci ;; 928c2ecf20Sopenharmony_ci andcm ret0=-1,ret0 938c2ecf20Sopenharmony_ci mov ar.pfs=r35 948c2ecf20Sopenharmony_ci mov b0=r34 958c2ecf20Sopenharmony_ci br.ret.sptk.many b0 968c2ecf20Sopenharmony_ciEND(ip_fast_csum) 978c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ip_fast_csum) 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ciGLOBAL_ENTRY(csum_ipv6_magic) 1008c2ecf20Sopenharmony_ci ld4 r20=[in0],4 1018c2ecf20Sopenharmony_ci ld4 r21=[in1],4 1028c2ecf20Sopenharmony_ci zxt4 in2=in2 1038c2ecf20Sopenharmony_ci ;; 1048c2ecf20Sopenharmony_ci ld4 r22=[in0],4 1058c2ecf20Sopenharmony_ci ld4 r23=[in1],4 1068c2ecf20Sopenharmony_ci dep r15=in3,in2,32,16 1078c2ecf20Sopenharmony_ci ;; 1088c2ecf20Sopenharmony_ci ld4 r24=[in0],4 1098c2ecf20Sopenharmony_ci ld4 r25=[in1],4 1108c2ecf20Sopenharmony_ci mux1 r15=r15,@rev 1118c2ecf20Sopenharmony_ci add r16=r20,r21 1128c2ecf20Sopenharmony_ci add r17=r22,r23 1138c2ecf20Sopenharmony_ci zxt4 in4=in4 1148c2ecf20Sopenharmony_ci ;; 1158c2ecf20Sopenharmony_ci ld4 r26=[in0],4 1168c2ecf20Sopenharmony_ci ld4 r27=[in1],4 1178c2ecf20Sopenharmony_ci shr.u r15=r15,16 1188c2ecf20Sopenharmony_ci add r18=r24,r25 1198c2ecf20Sopenharmony_ci add r8=r16,r17 1208c2ecf20Sopenharmony_ci ;; 1218c2ecf20Sopenharmony_ci add r19=r26,r27 1228c2ecf20Sopenharmony_ci add r8=r8,r18 1238c2ecf20Sopenharmony_ci ;; 1248c2ecf20Sopenharmony_ci add r8=r8,r19 1258c2ecf20Sopenharmony_ci add r15=r15,in4 1268c2ecf20Sopenharmony_ci ;; 1278c2ecf20Sopenharmony_ci add r8=r8,r15 1288c2ecf20Sopenharmony_ci ;; 1298c2ecf20Sopenharmony_ci shr.u r10=r8,32 // now fold sum into short 1308c2ecf20Sopenharmony_ci zxt4 r11=r8 1318c2ecf20Sopenharmony_ci ;; 1328c2ecf20Sopenharmony_ci add r8=r10,r11 1338c2ecf20Sopenharmony_ci ;; 1348c2ecf20Sopenharmony_ci shr.u r10=r8,16 // yeah, keep it rolling 1358c2ecf20Sopenharmony_ci zxt2 r11=r8 1368c2ecf20Sopenharmony_ci ;; 1378c2ecf20Sopenharmony_ci add r8=r10,r11 1388c2ecf20Sopenharmony_ci ;; 1398c2ecf20Sopenharmony_ci shr.u r10=r8,16 // three times lucky 1408c2ecf20Sopenharmony_ci zxt2 r11=r8 1418c2ecf20Sopenharmony_ci ;; 1428c2ecf20Sopenharmony_ci add r8=r10,r11 1438c2ecf20Sopenharmony_ci mov r9=0xffff 1448c2ecf20Sopenharmony_ci ;; 1458c2ecf20Sopenharmony_ci andcm r8=r9,r8 1468c2ecf20Sopenharmony_ci br.ret.sptk.many b0 1478c2ecf20Sopenharmony_ciEND(csum_ipv6_magic) 1488c2ecf20Sopenharmony_ciEXPORT_SYMBOL(csum_ipv6_magic) 149