162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Checksum functions for Hexagon 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci/* This was derived from arch/alpha/lib/checksum.c */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/module.h> 1262306a36Sopenharmony_ci#include <linux/string.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <asm/byteorder.h> 1562306a36Sopenharmony_ci#include <net/checksum.h> 1662306a36Sopenharmony_ci#include <linux/uaccess.h> 1762306a36Sopenharmony_ci#include <asm/intrinsics.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci/* Vector value operations */ 2162306a36Sopenharmony_ci#define SIGN(x, y) ((0x8000ULL*x)<<y) 2262306a36Sopenharmony_ci#define CARRY(x, y) ((0x0002ULL*x)<<y) 2362306a36Sopenharmony_ci#define SELECT(x, y) ((0x0001ULL*x)<<y) 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define VR_NEGATE(a, b, c, d) (SIGN(a, 48) + SIGN(b, 32) + SIGN(c, 16) \ 2662306a36Sopenharmony_ci + SIGN(d, 0)) 2762306a36Sopenharmony_ci#define VR_CARRY(a, b, c, d) (CARRY(a, 48) + CARRY(b, 32) + CARRY(c, 16) \ 2862306a36Sopenharmony_ci + CARRY(d, 0)) 2962306a36Sopenharmony_ci#define VR_SELECT(a, b, c, d) (SELECT(a, 48) + SELECT(b, 32) + SELECT(c, 16) \ 3062306a36Sopenharmony_ci + SELECT(d, 0)) 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* optimized HEXAGON V3 intrinsic version */ 3462306a36Sopenharmony_cistatic inline unsigned short from64to16(u64 x) 3562306a36Sopenharmony_ci{ 3662306a36Sopenharmony_ci u64 sum; 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci sum = HEXAGON_P_vrmpyh_PP(x^VR_NEGATE(1, 1, 1, 1), 3962306a36Sopenharmony_ci VR_SELECT(1, 1, 1, 1)); 4062306a36Sopenharmony_ci sum += VR_CARRY(0, 0, 1, 0); 4162306a36Sopenharmony_ci sum = HEXAGON_P_vrmpyh_PP(sum, VR_SELECT(0, 0, 1, 1)); 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci return 0xFFFF & sum; 4462306a36Sopenharmony_ci} 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* 4762306a36Sopenharmony_ci * computes the checksum of the TCP/UDP pseudo-header 4862306a36Sopenharmony_ci * returns a 16-bit checksum, already complemented. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_ci__sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, 5162306a36Sopenharmony_ci __u32 len, __u8 proto, __wsum sum) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci return (__force __sum16)~from64to16( 5462306a36Sopenharmony_ci (__force u64)saddr + (__force u64)daddr + 5562306a36Sopenharmony_ci (__force u64)sum + ((len + proto) << 8)); 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, 5962306a36Sopenharmony_ci __u32 len, __u8 proto, __wsum sum) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci u64 result; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci result = (__force u64)saddr + (__force u64)daddr + 6462306a36Sopenharmony_ci (__force u64)sum + ((len + proto) << 8); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci /* Fold down to 32-bits so we don't lose in the typedef-less 6762306a36Sopenharmony_ci network stack. */ 6862306a36Sopenharmony_ci /* 64 to 33 */ 6962306a36Sopenharmony_ci result = (result & 0xffffffffUL) + (result >> 32); 7062306a36Sopenharmony_ci /* 33 to 32 */ 7162306a36Sopenharmony_ci result = (result & 0xffffffffUL) + (result >> 32); 7262306a36Sopenharmony_ci return (__force __wsum)result; 7362306a36Sopenharmony_ci} 7462306a36Sopenharmony_ciEXPORT_SYMBOL(csum_tcpudp_nofold); 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci/* 7762306a36Sopenharmony_ci * Do a 64-bit checksum on an arbitrary memory area.. 7862306a36Sopenharmony_ci * 7962306a36Sopenharmony_ci * This isn't a great routine, but it's not _horrible_ either. The 8062306a36Sopenharmony_ci * inner loop could be unrolled a bit further, and there are better 8162306a36Sopenharmony_ci * ways to do the carry, but this is reasonable. 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci/* optimized HEXAGON intrinsic version, with over read fixed */ 8562306a36Sopenharmony_ciunsigned int do_csum(const void *voidptr, int len) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci u64 sum0, sum1, x0, x1, *ptr8_o, *ptr8_e, *ptr8; 8862306a36Sopenharmony_ci int i, start, mid, end, mask; 8962306a36Sopenharmony_ci const char *ptr = voidptr; 9062306a36Sopenharmony_ci unsigned short *ptr2; 9162306a36Sopenharmony_ci unsigned int *ptr4; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci if (len <= 0) 9462306a36Sopenharmony_ci return 0; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci start = 0xF & (16-(((int) ptr) & 0xF)) ; 9762306a36Sopenharmony_ci mask = 0x7fffffffUL >> HEXAGON_R_cl0_R(len); 9862306a36Sopenharmony_ci start = start & mask ; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci mid = len - start; 10162306a36Sopenharmony_ci end = mid & 0xF; 10262306a36Sopenharmony_ci mid = mid>>4; 10362306a36Sopenharmony_ci sum0 = mid << 18; 10462306a36Sopenharmony_ci sum1 = 0; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci if (start & 1) 10762306a36Sopenharmony_ci sum0 += (u64) (ptr[0] << 8); 10862306a36Sopenharmony_ci ptr2 = (unsigned short *) &ptr[start & 1]; 10962306a36Sopenharmony_ci if (start & 2) 11062306a36Sopenharmony_ci sum1 += (u64) ptr2[0]; 11162306a36Sopenharmony_ci ptr4 = (unsigned int *) &ptr[start & 3]; 11262306a36Sopenharmony_ci if (start & 4) { 11362306a36Sopenharmony_ci sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, 11462306a36Sopenharmony_ci VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), 11562306a36Sopenharmony_ci VR_SELECT(0, 0, 1, 1)); 11662306a36Sopenharmony_ci sum0 += VR_SELECT(0, 0, 1, 0); 11762306a36Sopenharmony_ci } 11862306a36Sopenharmony_ci ptr8 = (u64 *) &ptr[start & 7]; 11962306a36Sopenharmony_ci if (start & 8) { 12062306a36Sopenharmony_ci sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, 12162306a36Sopenharmony_ci VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), 12262306a36Sopenharmony_ci VR_SELECT(1, 1, 1, 1)); 12362306a36Sopenharmony_ci sum1 += VR_CARRY(0, 0, 1, 0); 12462306a36Sopenharmony_ci } 12562306a36Sopenharmony_ci ptr8_o = (u64 *) (ptr + start); 12662306a36Sopenharmony_ci ptr8_e = (u64 *) (ptr + start + 8); 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci if (mid) { 12962306a36Sopenharmony_ci x0 = *ptr8_e; ptr8_e += 2; 13062306a36Sopenharmony_ci x1 = *ptr8_o; ptr8_o += 2; 13162306a36Sopenharmony_ci if (mid > 1) 13262306a36Sopenharmony_ci for (i = 0; i < mid-1; i++) { 13362306a36Sopenharmony_ci sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, 13462306a36Sopenharmony_ci x0^VR_NEGATE(1, 1, 1, 1), 13562306a36Sopenharmony_ci VR_SELECT(1, 1, 1, 1)); 13662306a36Sopenharmony_ci sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, 13762306a36Sopenharmony_ci x1^VR_NEGATE(1, 1, 1, 1), 13862306a36Sopenharmony_ci VR_SELECT(1, 1, 1, 1)); 13962306a36Sopenharmony_ci x0 = *ptr8_e; ptr8_e += 2; 14062306a36Sopenharmony_ci x1 = *ptr8_o; ptr8_o += 2; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, x0^VR_NEGATE(1, 1, 1, 1), 14362306a36Sopenharmony_ci VR_SELECT(1, 1, 1, 1)); 14462306a36Sopenharmony_ci sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, x1^VR_NEGATE(1, 1, 1, 1), 14562306a36Sopenharmony_ci VR_SELECT(1, 1, 1, 1)); 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci ptr4 = (unsigned int *) &ptr[start + (mid * 16) + (end & 8)]; 14962306a36Sopenharmony_ci if (end & 4) { 15062306a36Sopenharmony_ci sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, 15162306a36Sopenharmony_ci VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), 15262306a36Sopenharmony_ci VR_SELECT(0, 0, 1, 1)); 15362306a36Sopenharmony_ci sum1 += VR_SELECT(0, 0, 1, 0); 15462306a36Sopenharmony_ci } 15562306a36Sopenharmony_ci ptr2 = (unsigned short *) &ptr[start + (mid * 16) + (end & 12)]; 15662306a36Sopenharmony_ci if (end & 2) 15762306a36Sopenharmony_ci sum0 += (u64) ptr2[0]; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci if (end & 1) 16062306a36Sopenharmony_ci sum1 += (u64) ptr[start + (mid * 16) + (end & 14)]; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci ptr8 = (u64 *) &ptr[start + (mid * 16)]; 16362306a36Sopenharmony_ci if (end & 8) { 16462306a36Sopenharmony_ci sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, 16562306a36Sopenharmony_ci VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), 16662306a36Sopenharmony_ci VR_SELECT(1, 1, 1, 1)); 16762306a36Sopenharmony_ci sum0 += VR_CARRY(0, 0, 1, 0); 16862306a36Sopenharmony_ci } 16962306a36Sopenharmony_ci sum0 = HEXAGON_P_vrmpyh_PP((sum0+sum1)^VR_NEGATE(0, 0, 0, 1), 17062306a36Sopenharmony_ci VR_SELECT(0, 0, 1, 1)); 17162306a36Sopenharmony_ci sum0 += VR_NEGATE(0, 0, 0, 1); 17262306a36Sopenharmony_ci sum0 = HEXAGON_P_vrmpyh_PP(sum0, VR_SELECT(0, 0, 1, 1)); 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci if (start & 1) 17562306a36Sopenharmony_ci sum0 = (sum0 << 8) | (0xFF & (sum0 >> 8)); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci return 0xFFFF & sum0; 17862306a36Sopenharmony_ci} 179