18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci// Copyright (C) 2019-2020 Arm Ltd. 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/compiler.h> 58c2ecf20Sopenharmony_ci#include <linux/kasan-checks.h> 68c2ecf20Sopenharmony_ci#include <linux/kernel.h> 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <net/checksum.h> 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_cistatic u64 accumulate(u64 sum, u64 data) 118c2ecf20Sopenharmony_ci{ 128c2ecf20Sopenharmony_ci sum += data; 138c2ecf20Sopenharmony_ci if (sum < data) 148c2ecf20Sopenharmony_ci sum += 1; 158c2ecf20Sopenharmony_ci return sum; 168c2ecf20Sopenharmony_ci} 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci/* 198c2ecf20Sopenharmony_ci * We over-read the buffer and this makes KASAN unhappy. Instead, disable 208c2ecf20Sopenharmony_ci * instrumentation and call kasan explicitly. 218c2ecf20Sopenharmony_ci */ 228c2ecf20Sopenharmony_ciunsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) 238c2ecf20Sopenharmony_ci{ 248c2ecf20Sopenharmony_ci unsigned int offset, shift, sum; 258c2ecf20Sopenharmony_ci const u64 *ptr; 268c2ecf20Sopenharmony_ci u64 data, sum64 = 0; 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci if (unlikely(len == 0)) 298c2ecf20Sopenharmony_ci return 0; 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci offset = (unsigned long)buff & 7; 328c2ecf20Sopenharmony_ci /* 338c2ecf20Sopenharmony_ci * This is to all intents and purposes safe, since rounding down cannot 348c2ecf20Sopenharmony_ci * result in a different page or cache line being accessed, and @buff 358c2ecf20Sopenharmony_ci * should absolutely not be pointing to anything read-sensitive. We do, 368c2ecf20Sopenharmony_ci * however, have to be careful not to piss off KASAN, which means using 378c2ecf20Sopenharmony_ci * unchecked reads to accommodate the head and tail, for which we'll 388c2ecf20Sopenharmony_ci * compensate with an explicit check up-front. 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_ci kasan_check_read(buff, len); 418c2ecf20Sopenharmony_ci ptr = (u64 *)(buff - offset); 428c2ecf20Sopenharmony_ci len = len + offset - 8; 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci /* 458c2ecf20Sopenharmony_ci * Head: zero out any excess leading bytes. Shifting back by the same 468c2ecf20Sopenharmony_ci * amount should be at least as fast as any other way of handling the 478c2ecf20Sopenharmony_ci * odd/even alignment, and means we can ignore it until the very end. 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_ci shift = offset * 8; 508c2ecf20Sopenharmony_ci data = *ptr++; 518c2ecf20Sopenharmony_ci data = (data >> shift) << shift; 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci /* 548c2ecf20Sopenharmony_ci * Body: straightforward aligned loads from here on (the paired loads 558c2ecf20Sopenharmony_ci * underlying the quadword type still only need dword alignment). The 568c2ecf20Sopenharmony_ci * main loop strictly excludes the tail, so the second loop will always 578c2ecf20Sopenharmony_ci * run at least once. 588c2ecf20Sopenharmony_ci */ 598c2ecf20Sopenharmony_ci while (unlikely(len > 64)) { 608c2ecf20Sopenharmony_ci __uint128_t tmp1, tmp2, tmp3, tmp4; 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci tmp1 = *(__uint128_t *)ptr; 638c2ecf20Sopenharmony_ci tmp2 = *(__uint128_t *)(ptr + 2); 648c2ecf20Sopenharmony_ci tmp3 = *(__uint128_t *)(ptr + 4); 658c2ecf20Sopenharmony_ci tmp4 = *(__uint128_t *)(ptr + 6); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci len -= 64; 688c2ecf20Sopenharmony_ci ptr += 8; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci /* This is the "don't dump the carry flag into a GPR" idiom */ 718c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 728c2ecf20Sopenharmony_ci tmp2 += (tmp2 >> 64) | (tmp2 << 64); 738c2ecf20Sopenharmony_ci tmp3 += (tmp3 >> 64) | (tmp3 << 64); 748c2ecf20Sopenharmony_ci tmp4 += (tmp4 >> 64) | (tmp4 << 64); 758c2ecf20Sopenharmony_ci tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); 768c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 778c2ecf20Sopenharmony_ci tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); 788c2ecf20Sopenharmony_ci tmp3 += (tmp3 >> 64) | (tmp3 << 64); 798c2ecf20Sopenharmony_ci tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); 808c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 818c2ecf20Sopenharmony_ci tmp1 = ((tmp1 >> 64) << 64) | sum64; 828c2ecf20Sopenharmony_ci tmp1 += (tmp1 >> 64) | (tmp1 << 64); 838c2ecf20Sopenharmony_ci sum64 = tmp1 >> 64; 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci while (len > 8) { 868c2ecf20Sopenharmony_ci __uint128_t tmp; 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, data); 898c2ecf20Sopenharmony_ci tmp = *(__uint128_t *)ptr; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci len -= 16; 928c2ecf20Sopenharmony_ci ptr += 2; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci data = tmp >> 64; 958c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, tmp); 968c2ecf20Sopenharmony_ci } 978c2ecf20Sopenharmony_ci if (len > 0) { 988c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, data); 998c2ecf20Sopenharmony_ci data = *ptr; 1008c2ecf20Sopenharmony_ci len -= 8; 1018c2ecf20Sopenharmony_ci } 1028c2ecf20Sopenharmony_ci /* 1038c2ecf20Sopenharmony_ci * Tail: zero any over-read bytes similarly to the head, again 1048c2ecf20Sopenharmony_ci * preserving odd/even alignment. 1058c2ecf20Sopenharmony_ci */ 1068c2ecf20Sopenharmony_ci shift = len * -8; 1078c2ecf20Sopenharmony_ci data = (data << shift) >> shift; 1088c2ecf20Sopenharmony_ci sum64 = accumulate(sum64, data); 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci /* Finally, folding */ 1118c2ecf20Sopenharmony_ci sum64 += (sum64 >> 32) | (sum64 << 32); 1128c2ecf20Sopenharmony_ci sum = sum64 >> 32; 1138c2ecf20Sopenharmony_ci sum += (sum >> 16) | (sum << 16); 1148c2ecf20Sopenharmony_ci if (offset & 1) 1158c2ecf20Sopenharmony_ci return (u16)swab32(sum); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci return sum >> 16; 1188c2ecf20Sopenharmony_ci} 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci__sum16 csum_ipv6_magic(const struct in6_addr *saddr, 1218c2ecf20Sopenharmony_ci const struct in6_addr *daddr, 1228c2ecf20Sopenharmony_ci __u32 len, __u8 proto, __wsum csum) 1238c2ecf20Sopenharmony_ci{ 1248c2ecf20Sopenharmony_ci __uint128_t src, dst; 1258c2ecf20Sopenharmony_ci u64 sum = (__force u64)csum; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci src = *(const __uint128_t *)saddr->s6_addr; 1288c2ecf20Sopenharmony_ci dst = *(const __uint128_t *)daddr->s6_addr; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci sum += (__force u32)htonl(len); 1318c2ecf20Sopenharmony_ci sum += (u32)proto << 24; 1328c2ecf20Sopenharmony_ci src += (src >> 64) | (src << 64); 1338c2ecf20Sopenharmony_ci dst += (dst >> 64) | (dst << 64); 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci sum = accumulate(sum, src >> 64); 1368c2ecf20Sopenharmony_ci sum = accumulate(sum, dst >> 64); 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci sum += ((sum >> 32) | (sum << 32)); 1398c2ecf20Sopenharmony_ci return csum_fold((__force __wsum)(sum >> 32)); 1408c2ecf20Sopenharmony_ci} 1418c2ecf20Sopenharmony_ciEXPORT_SYMBOL(csum_ipv6_magic); 142