162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Optmized version of the ip_fast_csum() function
462306a36Sopenharmony_ci * Used for calculating IP header checksum
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Return: 16bit checksum, complemented
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Inputs:
962306a36Sopenharmony_ci *      in0: address of buffer to checksum (char *)
1062306a36Sopenharmony_ci *      in1: length of the buffer (int)
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * Copyright (C) 2002, 2006 Intel Corp.
1362306a36Sopenharmony_ci * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/export.h>
1762306a36Sopenharmony_ci#include <asm/asmmacro.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci/*
2062306a36Sopenharmony_ci * Since we know that most likely this function is called with buf aligned
2162306a36Sopenharmony_ci * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
2262306a36Sopenharmony_ci * versus calling generic version of do_csum, which has lots of overhead in
2362306a36Sopenharmony_ci * handling various alignments and sizes.  However, due to lack of constrains
2462306a36Sopenharmony_ci * put on the function input argument, cases with alignment not on 4-byte or
2562306a36Sopenharmony_ci * size not equal to 20 bytes will be handled by the generic do_csum function.
2662306a36Sopenharmony_ci */
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#define in0	r32
2962306a36Sopenharmony_ci#define in1	r33
3062306a36Sopenharmony_ci#define in2	r34
3162306a36Sopenharmony_ci#define in3	r35
3262306a36Sopenharmony_ci#define in4	r36
3362306a36Sopenharmony_ci#define ret0	r8
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ciGLOBAL_ENTRY(ip_fast_csum)
3662306a36Sopenharmony_ci	.prologue
3762306a36Sopenharmony_ci	.body
3862306a36Sopenharmony_ci	cmp.ne	p6,p7=5,in1	// size other than 20 byte?
3962306a36Sopenharmony_ci	and	r14=3,in0	// is it aligned on 4-byte?
4062306a36Sopenharmony_ci	add	r15=4,in0	// second source pointer
4162306a36Sopenharmony_ci	;;
4262306a36Sopenharmony_ci	cmp.ne.or.andcm p6,p7=r14,r0
4362306a36Sopenharmony_ci	;;
4462306a36Sopenharmony_ci(p7)	ld4	r20=[in0],8
4562306a36Sopenharmony_ci(p7)	ld4	r21=[r15],8
4662306a36Sopenharmony_ci(p6)	br.spnt	.generic
4762306a36Sopenharmony_ci	;;
4862306a36Sopenharmony_ci	ld4	r22=[in0],8
4962306a36Sopenharmony_ci	ld4	r23=[r15],8
5062306a36Sopenharmony_ci	;;
5162306a36Sopenharmony_ci	ld4	r24=[in0]
5262306a36Sopenharmony_ci	add	r20=r20,r21
5362306a36Sopenharmony_ci	add	r22=r22,r23
5462306a36Sopenharmony_ci	;;
5562306a36Sopenharmony_ci	add	r20=r20,r22
5662306a36Sopenharmony_ci	;;
5762306a36Sopenharmony_ci	add	r20=r20,r24
5862306a36Sopenharmony_ci	;;
5962306a36Sopenharmony_ci	shr.u	ret0=r20,16	// now need to add the carry
6062306a36Sopenharmony_ci	zxt2	r20=r20
6162306a36Sopenharmony_ci	;;
6262306a36Sopenharmony_ci	add	r20=ret0,r20
6362306a36Sopenharmony_ci	;;
6462306a36Sopenharmony_ci	shr.u	ret0=r20,16	// add carry again
6562306a36Sopenharmony_ci	zxt2	r20=r20
6662306a36Sopenharmony_ci	;;
6762306a36Sopenharmony_ci	add	r20=ret0,r20
6862306a36Sopenharmony_ci	;;
6962306a36Sopenharmony_ci	shr.u	ret0=r20,16
7062306a36Sopenharmony_ci	zxt2	r20=r20
7162306a36Sopenharmony_ci	;;
7262306a36Sopenharmony_ci	add	r20=ret0,r20
7362306a36Sopenharmony_ci	mov	r9=0xffff
7462306a36Sopenharmony_ci	;;
7562306a36Sopenharmony_ci	andcm	ret0=r9,r20
7662306a36Sopenharmony_ci	.restore sp		// reset frame state
7762306a36Sopenharmony_ci	br.ret.sptk.many b0
7862306a36Sopenharmony_ci	;;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci.generic:
8162306a36Sopenharmony_ci	.prologue
8262306a36Sopenharmony_ci	.save ar.pfs, r35
8362306a36Sopenharmony_ci	alloc	r35=ar.pfs,2,2,2,0
8462306a36Sopenharmony_ci	.save rp, r34
8562306a36Sopenharmony_ci	mov	r34=b0
8662306a36Sopenharmony_ci	.body
8762306a36Sopenharmony_ci	dep.z	out1=in1,2,30
8862306a36Sopenharmony_ci	mov	out0=in0
8962306a36Sopenharmony_ci	;;
9062306a36Sopenharmony_ci	br.call.sptk.many b0=do_csum
9162306a36Sopenharmony_ci	;;
9262306a36Sopenharmony_ci	andcm	ret0=-1,ret0
9362306a36Sopenharmony_ci	mov	ar.pfs=r35
9462306a36Sopenharmony_ci	mov	b0=r34
9562306a36Sopenharmony_ci	br.ret.sptk.many b0
9662306a36Sopenharmony_ciEND(ip_fast_csum)
9762306a36Sopenharmony_ciEXPORT_SYMBOL(ip_fast_csum)
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ciGLOBAL_ENTRY(csum_ipv6_magic)
10062306a36Sopenharmony_ci	ld4	r20=[in0],4
10162306a36Sopenharmony_ci	ld4	r21=[in1],4
10262306a36Sopenharmony_ci	zxt4	in2=in2
10362306a36Sopenharmony_ci	;;
10462306a36Sopenharmony_ci	ld4	r22=[in0],4
10562306a36Sopenharmony_ci	ld4	r23=[in1],4
10662306a36Sopenharmony_ci	dep	r15=in3,in2,32,16
10762306a36Sopenharmony_ci	;;
10862306a36Sopenharmony_ci	ld4	r24=[in0],4
10962306a36Sopenharmony_ci	ld4	r25=[in1],4
11062306a36Sopenharmony_ci	mux1	r15=r15,@rev
11162306a36Sopenharmony_ci	add	r16=r20,r21
11262306a36Sopenharmony_ci	add	r17=r22,r23
11362306a36Sopenharmony_ci	zxt4	in4=in4
11462306a36Sopenharmony_ci	;;
11562306a36Sopenharmony_ci	ld4	r26=[in0],4
11662306a36Sopenharmony_ci	ld4	r27=[in1],4
11762306a36Sopenharmony_ci	shr.u	r15=r15,16
11862306a36Sopenharmony_ci	add	r18=r24,r25
11962306a36Sopenharmony_ci	add	r8=r16,r17
12062306a36Sopenharmony_ci	;;
12162306a36Sopenharmony_ci	add	r19=r26,r27
12262306a36Sopenharmony_ci	add	r8=r8,r18
12362306a36Sopenharmony_ci	;;
12462306a36Sopenharmony_ci	add	r8=r8,r19
12562306a36Sopenharmony_ci	add	r15=r15,in4
12662306a36Sopenharmony_ci	;;
12762306a36Sopenharmony_ci	add	r8=r8,r15
12862306a36Sopenharmony_ci	;;
12962306a36Sopenharmony_ci	shr.u	r10=r8,32	// now fold sum into short
13062306a36Sopenharmony_ci	zxt4	r11=r8
13162306a36Sopenharmony_ci	;;
13262306a36Sopenharmony_ci	add	r8=r10,r11
13362306a36Sopenharmony_ci	;;
13462306a36Sopenharmony_ci	shr.u	r10=r8,16	// yeah, keep it rolling
13562306a36Sopenharmony_ci	zxt2	r11=r8
13662306a36Sopenharmony_ci	;;
13762306a36Sopenharmony_ci	add	r8=r10,r11
13862306a36Sopenharmony_ci	;;
13962306a36Sopenharmony_ci	shr.u	r10=r8,16	// three times lucky
14062306a36Sopenharmony_ci	zxt2	r11=r8
14162306a36Sopenharmony_ci	;;
14262306a36Sopenharmony_ci	add	r8=r10,r11
14362306a36Sopenharmony_ci	mov	r9=0xffff
14462306a36Sopenharmony_ci	;;
14562306a36Sopenharmony_ci	andcm	r8=r9,r8
14662306a36Sopenharmony_ci	br.ret.sptk.many b0
14762306a36Sopenharmony_ciEND(csum_ipv6_magic)
14862306a36Sopenharmony_ciEXPORT_SYMBOL(csum_ipv6_magic)
149