162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Optmized version of the ip_fast_csum() function 462306a36Sopenharmony_ci * Used for calculating IP header checksum 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Return: 16bit checksum, complemented 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Inputs: 962306a36Sopenharmony_ci * in0: address of buffer to checksum (char *) 1062306a36Sopenharmony_ci * in1: length of the buffer (int) 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * Copyright (C) 2002, 2006 Intel Corp. 1362306a36Sopenharmony_ci * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> 1462306a36Sopenharmony_ci */ 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/export.h> 1762306a36Sopenharmony_ci#include <asm/asmmacro.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci/* 2062306a36Sopenharmony_ci * Since we know that most likely this function is called with buf aligned 2162306a36Sopenharmony_ci * on 4-byte boundary and 20 bytes in length, we can execution rather quickly 2262306a36Sopenharmony_ci * versus calling generic version of do_csum, which has lots of overhead in 2362306a36Sopenharmony_ci * handling various alignments and sizes. However, due to lack of constrains 2462306a36Sopenharmony_ci * put on the function input argument, cases with alignment not on 4-byte or 2562306a36Sopenharmony_ci * size not equal to 20 bytes will be handled by the generic do_csum function. 2662306a36Sopenharmony_ci */ 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#define in0 r32 2962306a36Sopenharmony_ci#define in1 r33 3062306a36Sopenharmony_ci#define in2 r34 3162306a36Sopenharmony_ci#define in3 r35 3262306a36Sopenharmony_ci#define in4 r36 3362306a36Sopenharmony_ci#define ret0 r8 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ciGLOBAL_ENTRY(ip_fast_csum) 3662306a36Sopenharmony_ci .prologue 3762306a36Sopenharmony_ci .body 3862306a36Sopenharmony_ci cmp.ne p6,p7=5,in1 // size other than 20 byte? 3962306a36Sopenharmony_ci and r14=3,in0 // is it aligned on 4-byte? 4062306a36Sopenharmony_ci add r15=4,in0 // second source pointer 4162306a36Sopenharmony_ci ;; 4262306a36Sopenharmony_ci cmp.ne.or.andcm p6,p7=r14,r0 4362306a36Sopenharmony_ci ;; 4462306a36Sopenharmony_ci(p7) ld4 r20=[in0],8 4562306a36Sopenharmony_ci(p7) ld4 r21=[r15],8 4662306a36Sopenharmony_ci(p6) br.spnt .generic 4762306a36Sopenharmony_ci ;; 4862306a36Sopenharmony_ci ld4 r22=[in0],8 4962306a36Sopenharmony_ci ld4 r23=[r15],8 5062306a36Sopenharmony_ci ;; 5162306a36Sopenharmony_ci ld4 r24=[in0] 5262306a36Sopenharmony_ci add r20=r20,r21 5362306a36Sopenharmony_ci add r22=r22,r23 5462306a36Sopenharmony_ci ;; 5562306a36Sopenharmony_ci add r20=r20,r22 5662306a36Sopenharmony_ci ;; 5762306a36Sopenharmony_ci add r20=r20,r24 5862306a36Sopenharmony_ci ;; 5962306a36Sopenharmony_ci shr.u ret0=r20,16 // now need to add the carry 6062306a36Sopenharmony_ci zxt2 r20=r20 6162306a36Sopenharmony_ci ;; 6262306a36Sopenharmony_ci add r20=ret0,r20 6362306a36Sopenharmony_ci ;; 6462306a36Sopenharmony_ci shr.u ret0=r20,16 // add carry again 6562306a36Sopenharmony_ci zxt2 r20=r20 6662306a36Sopenharmony_ci ;; 6762306a36Sopenharmony_ci add r20=ret0,r20 6862306a36Sopenharmony_ci ;; 6962306a36Sopenharmony_ci shr.u ret0=r20,16 7062306a36Sopenharmony_ci zxt2 r20=r20 7162306a36Sopenharmony_ci ;; 7262306a36Sopenharmony_ci add r20=ret0,r20 7362306a36Sopenharmony_ci mov r9=0xffff 7462306a36Sopenharmony_ci ;; 7562306a36Sopenharmony_ci andcm ret0=r9,r20 7662306a36Sopenharmony_ci .restore sp // reset frame state 7762306a36Sopenharmony_ci br.ret.sptk.many b0 7862306a36Sopenharmony_ci ;; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci.generic: 8162306a36Sopenharmony_ci .prologue 8262306a36Sopenharmony_ci .save ar.pfs, r35 8362306a36Sopenharmony_ci alloc r35=ar.pfs,2,2,2,0 8462306a36Sopenharmony_ci .save rp, r34 8562306a36Sopenharmony_ci mov r34=b0 8662306a36Sopenharmony_ci .body 8762306a36Sopenharmony_ci dep.z out1=in1,2,30 8862306a36Sopenharmony_ci mov out0=in0 8962306a36Sopenharmony_ci ;; 9062306a36Sopenharmony_ci br.call.sptk.many b0=do_csum 9162306a36Sopenharmony_ci ;; 9262306a36Sopenharmony_ci andcm ret0=-1,ret0 9362306a36Sopenharmony_ci mov ar.pfs=r35 9462306a36Sopenharmony_ci mov b0=r34 9562306a36Sopenharmony_ci br.ret.sptk.many b0 9662306a36Sopenharmony_ciEND(ip_fast_csum) 9762306a36Sopenharmony_ciEXPORT_SYMBOL(ip_fast_csum) 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ciGLOBAL_ENTRY(csum_ipv6_magic) 10062306a36Sopenharmony_ci ld4 r20=[in0],4 10162306a36Sopenharmony_ci ld4 r21=[in1],4 10262306a36Sopenharmony_ci zxt4 in2=in2 10362306a36Sopenharmony_ci ;; 10462306a36Sopenharmony_ci ld4 r22=[in0],4 10562306a36Sopenharmony_ci ld4 r23=[in1],4 10662306a36Sopenharmony_ci dep r15=in3,in2,32,16 10762306a36Sopenharmony_ci ;; 10862306a36Sopenharmony_ci ld4 r24=[in0],4 10962306a36Sopenharmony_ci ld4 r25=[in1],4 11062306a36Sopenharmony_ci mux1 r15=r15,@rev 11162306a36Sopenharmony_ci add r16=r20,r21 11262306a36Sopenharmony_ci add r17=r22,r23 11362306a36Sopenharmony_ci zxt4 in4=in4 11462306a36Sopenharmony_ci ;; 11562306a36Sopenharmony_ci ld4 r26=[in0],4 11662306a36Sopenharmony_ci ld4 r27=[in1],4 11762306a36Sopenharmony_ci shr.u r15=r15,16 11862306a36Sopenharmony_ci add r18=r24,r25 11962306a36Sopenharmony_ci add r8=r16,r17 12062306a36Sopenharmony_ci ;; 12162306a36Sopenharmony_ci add r19=r26,r27 12262306a36Sopenharmony_ci add r8=r8,r18 12362306a36Sopenharmony_ci ;; 12462306a36Sopenharmony_ci add r8=r8,r19 12562306a36Sopenharmony_ci add r15=r15,in4 12662306a36Sopenharmony_ci ;; 12762306a36Sopenharmony_ci add r8=r8,r15 12862306a36Sopenharmony_ci ;; 12962306a36Sopenharmony_ci shr.u r10=r8,32 // now fold sum into short 13062306a36Sopenharmony_ci zxt4 r11=r8 13162306a36Sopenharmony_ci ;; 13262306a36Sopenharmony_ci add r8=r10,r11 13362306a36Sopenharmony_ci ;; 13462306a36Sopenharmony_ci shr.u r10=r8,16 // yeah, keep it rolling 13562306a36Sopenharmony_ci zxt2 r11=r8 13662306a36Sopenharmony_ci ;; 13762306a36Sopenharmony_ci add r8=r10,r11 13862306a36Sopenharmony_ci ;; 13962306a36Sopenharmony_ci shr.u r10=r8,16 // three times lucky 14062306a36Sopenharmony_ci zxt2 r11=r8 14162306a36Sopenharmony_ci ;; 14262306a36Sopenharmony_ci add r8=r10,r11 14362306a36Sopenharmony_ci mov r9=0xffff 14462306a36Sopenharmony_ci ;; 14562306a36Sopenharmony_ci andcm r8=r9,r8 14662306a36Sopenharmony_ci br.ret.sptk.many b0 14762306a36Sopenharmony_ciEND(csum_ipv6_magic) 14862306a36Sopenharmony_ciEXPORT_SYMBOL(csum_ipv6_magic) 149