18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Optmized version of the ip_fast_csum() function
48c2ecf20Sopenharmony_ci * Used for calculating IP header checksum
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Return: 16bit checksum, complemented
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Inputs:
98c2ecf20Sopenharmony_ci *      in0: address of buffer to checksum (char *)
108c2ecf20Sopenharmony_ci *      in1: length of the buffer (int)
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2006 Intel Corp.
138c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
148c2ecf20Sopenharmony_ci */
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <asm/asmmacro.h>
178c2ecf20Sopenharmony_ci#include <asm/export.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci/*
208c2ecf20Sopenharmony_ci * Since we know that most likely this function is called with buf aligned
218c2ecf20Sopenharmony_ci * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
228c2ecf20Sopenharmony_ci * versus calling generic version of do_csum, which has lots of overhead in
238c2ecf20Sopenharmony_ci * handling various alignments and sizes.  However, due to lack of constrains
248c2ecf20Sopenharmony_ci * put on the function input argument, cases with alignment not on 4-byte or
258c2ecf20Sopenharmony_ci * size not equal to 20 bytes will be handled by the generic do_csum function.
268c2ecf20Sopenharmony_ci */
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci#define in0	r32
298c2ecf20Sopenharmony_ci#define in1	r33
308c2ecf20Sopenharmony_ci#define in2	r34
318c2ecf20Sopenharmony_ci#define in3	r35
328c2ecf20Sopenharmony_ci#define in4	r36
338c2ecf20Sopenharmony_ci#define ret0	r8
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ciGLOBAL_ENTRY(ip_fast_csum)
368c2ecf20Sopenharmony_ci	.prologue
378c2ecf20Sopenharmony_ci	.body
388c2ecf20Sopenharmony_ci	cmp.ne	p6,p7=5,in1	// size other than 20 byte?
398c2ecf20Sopenharmony_ci	and	r14=3,in0	// is it aligned on 4-byte?
408c2ecf20Sopenharmony_ci	add	r15=4,in0	// second source pointer
418c2ecf20Sopenharmony_ci	;;
428c2ecf20Sopenharmony_ci	cmp.ne.or.andcm p6,p7=r14,r0
438c2ecf20Sopenharmony_ci	;;
448c2ecf20Sopenharmony_ci(p7)	ld4	r20=[in0],8
458c2ecf20Sopenharmony_ci(p7)	ld4	r21=[r15],8
468c2ecf20Sopenharmony_ci(p6)	br.spnt	.generic
478c2ecf20Sopenharmony_ci	;;
488c2ecf20Sopenharmony_ci	ld4	r22=[in0],8
498c2ecf20Sopenharmony_ci	ld4	r23=[r15],8
508c2ecf20Sopenharmony_ci	;;
518c2ecf20Sopenharmony_ci	ld4	r24=[in0]
528c2ecf20Sopenharmony_ci	add	r20=r20,r21
538c2ecf20Sopenharmony_ci	add	r22=r22,r23
548c2ecf20Sopenharmony_ci	;;
558c2ecf20Sopenharmony_ci	add	r20=r20,r22
568c2ecf20Sopenharmony_ci	;;
578c2ecf20Sopenharmony_ci	add	r20=r20,r24
588c2ecf20Sopenharmony_ci	;;
598c2ecf20Sopenharmony_ci	shr.u	ret0=r20,16	// now need to add the carry
608c2ecf20Sopenharmony_ci	zxt2	r20=r20
618c2ecf20Sopenharmony_ci	;;
628c2ecf20Sopenharmony_ci	add	r20=ret0,r20
638c2ecf20Sopenharmony_ci	;;
648c2ecf20Sopenharmony_ci	shr.u	ret0=r20,16	// add carry again
658c2ecf20Sopenharmony_ci	zxt2	r20=r20
668c2ecf20Sopenharmony_ci	;;
678c2ecf20Sopenharmony_ci	add	r20=ret0,r20
688c2ecf20Sopenharmony_ci	;;
698c2ecf20Sopenharmony_ci	shr.u	ret0=r20,16
708c2ecf20Sopenharmony_ci	zxt2	r20=r20
718c2ecf20Sopenharmony_ci	;;
728c2ecf20Sopenharmony_ci	add	r20=ret0,r20
738c2ecf20Sopenharmony_ci	mov	r9=0xffff
748c2ecf20Sopenharmony_ci	;;
758c2ecf20Sopenharmony_ci	andcm	ret0=r9,r20
768c2ecf20Sopenharmony_ci	.restore sp		// reset frame state
778c2ecf20Sopenharmony_ci	br.ret.sptk.many b0
788c2ecf20Sopenharmony_ci	;;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci.generic:
818c2ecf20Sopenharmony_ci	.prologue
828c2ecf20Sopenharmony_ci	.save ar.pfs, r35
838c2ecf20Sopenharmony_ci	alloc	r35=ar.pfs,2,2,2,0
848c2ecf20Sopenharmony_ci	.save rp, r34
858c2ecf20Sopenharmony_ci	mov	r34=b0
868c2ecf20Sopenharmony_ci	.body
878c2ecf20Sopenharmony_ci	dep.z	out1=in1,2,30
888c2ecf20Sopenharmony_ci	mov	out0=in0
898c2ecf20Sopenharmony_ci	;;
908c2ecf20Sopenharmony_ci	br.call.sptk.many b0=do_csum
918c2ecf20Sopenharmony_ci	;;
928c2ecf20Sopenharmony_ci	andcm	ret0=-1,ret0
938c2ecf20Sopenharmony_ci	mov	ar.pfs=r35
948c2ecf20Sopenharmony_ci	mov	b0=r34
958c2ecf20Sopenharmony_ci	br.ret.sptk.many b0
968c2ecf20Sopenharmony_ciEND(ip_fast_csum)
978c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ip_fast_csum)
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ciGLOBAL_ENTRY(csum_ipv6_magic)
1008c2ecf20Sopenharmony_ci	ld4	r20=[in0],4
1018c2ecf20Sopenharmony_ci	ld4	r21=[in1],4
1028c2ecf20Sopenharmony_ci	zxt4	in2=in2
1038c2ecf20Sopenharmony_ci	;;
1048c2ecf20Sopenharmony_ci	ld4	r22=[in0],4
1058c2ecf20Sopenharmony_ci	ld4	r23=[in1],4
1068c2ecf20Sopenharmony_ci	dep	r15=in3,in2,32,16
1078c2ecf20Sopenharmony_ci	;;
1088c2ecf20Sopenharmony_ci	ld4	r24=[in0],4
1098c2ecf20Sopenharmony_ci	ld4	r25=[in1],4
1108c2ecf20Sopenharmony_ci	mux1	r15=r15,@rev
1118c2ecf20Sopenharmony_ci	add	r16=r20,r21
1128c2ecf20Sopenharmony_ci	add	r17=r22,r23
1138c2ecf20Sopenharmony_ci	zxt4	in4=in4
1148c2ecf20Sopenharmony_ci	;;
1158c2ecf20Sopenharmony_ci	ld4	r26=[in0],4
1168c2ecf20Sopenharmony_ci	ld4	r27=[in1],4
1178c2ecf20Sopenharmony_ci	shr.u	r15=r15,16
1188c2ecf20Sopenharmony_ci	add	r18=r24,r25
1198c2ecf20Sopenharmony_ci	add	r8=r16,r17
1208c2ecf20Sopenharmony_ci	;;
1218c2ecf20Sopenharmony_ci	add	r19=r26,r27
1228c2ecf20Sopenharmony_ci	add	r8=r8,r18
1238c2ecf20Sopenharmony_ci	;;
1248c2ecf20Sopenharmony_ci	add	r8=r8,r19
1258c2ecf20Sopenharmony_ci	add	r15=r15,in4
1268c2ecf20Sopenharmony_ci	;;
1278c2ecf20Sopenharmony_ci	add	r8=r8,r15
1288c2ecf20Sopenharmony_ci	;;
1298c2ecf20Sopenharmony_ci	shr.u	r10=r8,32	// now fold sum into short
1308c2ecf20Sopenharmony_ci	zxt4	r11=r8
1318c2ecf20Sopenharmony_ci	;;
1328c2ecf20Sopenharmony_ci	add	r8=r10,r11
1338c2ecf20Sopenharmony_ci	;;
1348c2ecf20Sopenharmony_ci	shr.u	r10=r8,16	// yeah, keep it rolling
1358c2ecf20Sopenharmony_ci	zxt2	r11=r8
1368c2ecf20Sopenharmony_ci	;;
1378c2ecf20Sopenharmony_ci	add	r8=r10,r11
1388c2ecf20Sopenharmony_ci	;;
1398c2ecf20Sopenharmony_ci	shr.u	r10=r8,16	// three times lucky
1408c2ecf20Sopenharmony_ci	zxt2	r11=r8
1418c2ecf20Sopenharmony_ci	;;
1428c2ecf20Sopenharmony_ci	add	r8=r10,r11
1438c2ecf20Sopenharmony_ci	mov	r9=0xffff
1448c2ecf20Sopenharmony_ci	;;
1458c2ecf20Sopenharmony_ci	andcm	r8=r9,r8
1468c2ecf20Sopenharmony_ci	br.ret.sptk.many b0
1478c2ecf20Sopenharmony_ciEND(csum_ipv6_magic)
1488c2ecf20Sopenharmony_ciEXPORT_SYMBOL(csum_ipv6_magic)
149