18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci// Copyright (C) 2019-2020 Arm Ltd. 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/compiler.h> 58c2ecf20Sopenharmony_ci#include <linux/kasan-checks.h> 68c2ecf20Sopenharmony_ci#include <linux/kernel.h> 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <net/checksum.h> 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci/* Looks dumb, but generates nice-ish code */ 118c2ecf20Sopenharmony_cistatic u64 accumulate(u64 sum, u64 data) 128c2ecf20Sopenharmony_ci{ 138c2ecf20Sopenharmony_ci __uint128_t tmp = (__uint128_t)sum + data; 148c2ecf20Sopenharmony_ci return tmp + (tmp >> 64); 158c2ecf20Sopenharmony_ci} 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci/* 188c2ecf20Sopenharmony_ci * We over-read the buffer and this makes KASAN unhappy. Instead, disable 198c2ecf20Sopenharmony_ci * instrumentation and call kasan explicitly. 208c2ecf20Sopenharmony_ci */ 218c2ecf20Sopenharmony_ciunsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) 228c2ecf20Sopenharmony_ci{ 238c2ecf20Sopenharmony_ci unsigned int offset, shift, sum; 248c2ecf20Sopenharmony_ci const u64 *ptr; 258c2ecf20Sopenharmony_ci u64 data, sum64 = 0; 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci if (unlikely(len <= 0)) 288c2ecf20Sopenharmony_ci return 0; 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci offset = (unsigned long)buff & 7; 318c2ecf20Sopenharmony_ci /* 328c2ecf20Sopenharmony_ci * This is to all intents and purposes safe, since rounding down cannot 338c2ecf20Sopenharmony_ci * result in a different page or cache line being accessed, and @buff 348c2ecf20Sopenharmony_ci * should absolutely not be pointing to anything read-sensitive. We do, 358c2ecf20Sopenharmony_ci * however, have to be careful not to piss off KASAN, which means using 368c2ecf20Sopenharmony_ci * unchecked reads to accommodate the head and tail, for which we'll 378c2ecf20Sopenharmony_ci * compensate with an explicit check up-front. 388c2ecf20Sopenharmony_ci */ 398c2ecf20Sopenharmony_ci kasan_check_read(buff, len); 408c2ecf20Sopenharmony_ci ptr = (u64 *)(buff - offset); 418c2ecf20Sopenharmony_ci len = len + offset - 8; 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci /* 448c2ecf20Sopenharmony_ci * Head: zero out any excess leading bytes. Shifting back by the same 458c2ecf20Sopenharmony_ci * amount should be at least as fast as any other way of handling the 468c2ecf20Sopenharmony_ci * odd/even alignment, and means we can ignore it until the very end. 478c2ecf20Sopenharmony_ci */ 488c2ecf20Sopenharmony_ci shift = offset * 8; 498c2ecf20Sopenharmony_ci data = *ptr++; 508c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 518c2ecf20Sopenharmony_ci data = (data >> shift) << shift; 528c2ecf20Sopenharmony_ci#else 538c2ecf20Sopenharmony_ci data = (data << shift) >> shift; 548c2ecf20Sopenharmony_ci#endif 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci /* 578c2ecf20Sopenharmony_ci * Body: straightforward aligned loads from here on (the paired loads 588c2ecf20Sopenharmony_ci * underlying the quadword type still only need dword alignment). The 598c2ecf20Sopenharmony_ci * main loop strictly excludes the tail, so the second loop will always 608c2ecf20Sopenharmony_ci * run at least once. 618c2ecf20Sopenharmony_ci */ 628c2ecf20Sopenharmony_ci while (unlikely(len > 64)) { 638c2ecf20Sopenharmony_ci __uint128_t tmp1, tmp2, tmp3, tmp4; 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci tmp1 = *(__uint128_t *)ptr; 668c2ecf20Sopenharmony_ci tmp2 = *(__uint128_t *)(ptr + 2); 678c2ecf20Sopenharmony_ci tmp3 = *(__uint128_t *)(ptr + 4); 688c2ecf20Sopenharmony_ci tmp4 = *(__uint128_t *)(ptr + 6); 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci len -= 64; 718c2ecf20Sopenharmony_ci ptr += 8; 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci /* This is the "don't dump the carry flag into a GPR" idiom */ 748c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 758c2ecf20Sopenharmony_ci tmp2 += (tmp2 >> 64) | (tmp2 << 64); 768c2ecf20Sopenharmony_ci tmp3 += (tmp3 >> 64) | (tmp3 << 64); 778c2ecf20Sopenharmony_ci tmp4 += (tmp4 >> 64) | (tmp4 << 64); 788c2ecf20Sopenharmony_ci tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); 798c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 808c2ecf20Sopenharmony_ci tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); 818c2ecf20Sopenharmony_ci tmp3 += (tmp3 >> 64) | (tmp3 << 64); 828c2ecf20Sopenharmony_ci tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); 838c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 848c2ecf20Sopenharmony_ci tmp1 = ((tmp1 >> 64) << 64) | sum64; 858c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 868c2ecf20Sopenharmony_ci sum64 = tmp1 >> 64; 878c2ecf20Sopenharmony_ci } 888c2ecf20Sopenharmony_ci while (len > 8) { 898c2ecf20Sopenharmony_ci __uint128_t tmp; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, data); 928c2ecf20Sopenharmony_ci tmp = *(__uint128_t *)ptr; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci len -= 16; 958c2ecf20Sopenharmony_ci ptr += 2; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 988c2ecf20Sopenharmony_ci data = tmp >> 64; 998c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, tmp); 1008c2ecf20Sopenharmony_ci#else 1018c2ecf20Sopenharmony_ci data = tmp; 1028c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, tmp >> 64); 1038c2ecf20Sopenharmony_ci#endif 1048c2ecf20Sopenharmony_ci } 1058c2ecf20Sopenharmony_ci if (len > 0) { 1068c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, data); 1078c2ecf20Sopenharmony_ci data = *ptr; 1088c2ecf20Sopenharmony_ci len -= 8; 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci /* 1118c2ecf20Sopenharmony_ci * Tail: zero any over-read bytes similarly to the head, again 1128c2ecf20Sopenharmony_ci * preserving odd/even alignment. 1138c2ecf20Sopenharmony_ci */ 1148c2ecf20Sopenharmony_ci shift = len * -8; 1158c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 1168c2ecf20Sopenharmony_ci data = (data << shift) >> shift; 1178c2ecf20Sopenharmony_ci#else 1188c2ecf20Sopenharmony_ci data = (data >> shift) << shift; 1198c2ecf20Sopenharmony_ci#endif 1208c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, data); 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci /* Finally, folding */ 1238c2ecf20Sopenharmony_ci sum64 += (sum64 >> 32) | (sum64 << 32); 1248c2ecf20Sopenharmony_ci sum = sum64 >> 32; 1258c2ecf20Sopenharmony_ci sum += (sum >> 16) | (sum << 16); 1268c2ecf20Sopenharmony_ci if (offset & 1) 1278c2ecf20Sopenharmony_ci return (u16)swab32(sum); 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci return sum >> 16; 1308c2ecf20Sopenharmony_ci} 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci__sum16 csum_ipv6_magic(const struct in6_addr *saddr, 1338c2ecf20Sopenharmony_ci const struct in6_addr *daddr, 1348c2ecf20Sopenharmony_ci __u32 len, __u8 proto, __wsum csum) 1358c2ecf20Sopenharmony_ci{ 1368c2ecf20Sopenharmony_ci __uint128_t src, dst; 1378c2ecf20Sopenharmony_ci u64 sum = (__force u64)csum; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci src = *(const __uint128_t *)saddr->s6_addr; 1408c2ecf20Sopenharmony_ci dst = *(const __uint128_t *)daddr->s6_addr; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci sum += (__force u32)htonl(len); 1438c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 1448c2ecf20Sopenharmony_ci sum += (u32)proto << 24; 1458c2ecf20Sopenharmony_ci#else 1468c2ecf20Sopenharmony_ci sum += proto; 1478c2ecf20Sopenharmony_ci#endif 1488c2ecf20Sopenharmony_ci src += (src >> 64) | (src << 64); 1498c2ecf20Sopenharmony_ci dst += (dst >> 64) | (dst << 64); 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci sum = accumulate(sum, src >> 64); 1528c2ecf20Sopenharmony_ci sum = accumulate(sum, dst >> 64); 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci sum += ((sum >> 32) | (sum << 32)); 1558c2ecf20Sopenharmony_ci return csum_fold((__force __wsum)(sum >> 32)); 1568c2ecf20Sopenharmony_ci} 1578c2ecf20Sopenharmony_ciEXPORT_SYMBOL(csum_ipv6_magic); 158