1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * Compute 16-bit sum in ones' complement arithmetic (with end-around carry). 3bbbf1280Sopenharmony_ci * This sum is often used as a simple checksum in networking. 4bbbf1280Sopenharmony_ci * 5bbbf1280Sopenharmony_ci * Copyright (c) 2020, Arm Limited. 6bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7bbbf1280Sopenharmony_ci */ 8bbbf1280Sopenharmony_ci 9bbbf1280Sopenharmony_ci#include "networking.h" 10bbbf1280Sopenharmony_ci#include "chksum_common.h" 11bbbf1280Sopenharmony_ci 12bbbf1280Sopenharmony_cialways_inline 13bbbf1280Sopenharmony_cistatic inline uint32_t 14bbbf1280Sopenharmony_cislurp_head32(const void **pptr, uint32_t *nbytes) 15bbbf1280Sopenharmony_ci{ 16bbbf1280Sopenharmony_ci uint32_t sum = 0; 17bbbf1280Sopenharmony_ci Assert(*nbytes >= 4); 18bbbf1280Sopenharmony_ci uint32_t off = (uintptr_t) *pptr % 4; 19bbbf1280Sopenharmony_ci if (likely(off != 0)) 20bbbf1280Sopenharmony_ci { 21bbbf1280Sopenharmony_ci /* Get rid of bytes 0..off-1 */ 22bbbf1280Sopenharmony_ci const unsigned char *ptr32 = align_ptr(*pptr, 4); 23bbbf1280Sopenharmony_ci uint32_t mask = ~0U << (CHAR_BIT * off); 24bbbf1280Sopenharmony_ci sum = load32(ptr32) & mask; 25bbbf1280Sopenharmony_ci *pptr = ptr32 + 4; 26bbbf1280Sopenharmony_ci *nbytes -= 4 - off; 27bbbf1280Sopenharmony_ci } 28bbbf1280Sopenharmony_ci return sum; 29bbbf1280Sopenharmony_ci} 30bbbf1280Sopenharmony_ci 31bbbf1280Sopenharmony_ci/* Additional loop unrolling would help when not auto-vectorizing */ 32bbbf1280Sopenharmony_ciunsigned short 33bbbf1280Sopenharmony_ci__chksum(const void *ptr, unsigned int nbytes) 34bbbf1280Sopenharmony_ci{ 35bbbf1280Sopenharmony_ci bool swap = false; 36bbbf1280Sopenharmony_ci uint64_t sum = 0; 37bbbf1280Sopenharmony_ci 38bbbf1280Sopenharmony_ci if (nbytes > 300) 39bbbf1280Sopenharmony_ci { 40bbbf1280Sopenharmony_ci /* 4-byte align pointer */ 41bbbf1280Sopenharmony_ci swap = (uintptr_t) ptr & 1; 42bbbf1280Sopenharmony_ci sum = slurp_head32(&ptr, &nbytes); 43bbbf1280Sopenharmony_ci } 44bbbf1280Sopenharmony_ci /* Else benefit of aligning not worth the overhead */ 45bbbf1280Sopenharmony_ci 46bbbf1280Sopenharmony_ci /* Sum all 16-byte chunks */ 47bbbf1280Sopenharmony_ci const char *cptr = ptr; 48bbbf1280Sopenharmony_ci for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--) 49bbbf1280Sopenharmony_ci { 50bbbf1280Sopenharmony_ci uint64_t h0 = load32(cptr + 0); 51bbbf1280Sopenharmony_ci uint64_t h1 = load32(cptr + 4); 52bbbf1280Sopenharmony_ci uint64_t h2 = load32(cptr + 8); 53bbbf1280Sopenharmony_ci uint64_t h3 = load32(cptr + 12); 54bbbf1280Sopenharmony_ci sum += h0 + h1 + h2 + h3; 55bbbf1280Sopenharmony_ci cptr += 16; 56bbbf1280Sopenharmony_ci } 57bbbf1280Sopenharmony_ci nbytes %= 16; 58bbbf1280Sopenharmony_ci Assert(nbytes < 16); 59bbbf1280Sopenharmony_ci 60bbbf1280Sopenharmony_ci /* Handle any trailing 4-byte chunks */ 61bbbf1280Sopenharmony_ci while (nbytes >= 4) 62bbbf1280Sopenharmony_ci { 63bbbf1280Sopenharmony_ci sum += load32(cptr); 64bbbf1280Sopenharmony_ci cptr += 4; 65bbbf1280Sopenharmony_ci nbytes -= 4; 66bbbf1280Sopenharmony_ci } 67bbbf1280Sopenharmony_ci Assert(nbytes < 4); 68bbbf1280Sopenharmony_ci 69bbbf1280Sopenharmony_ci if (nbytes & 2) 70bbbf1280Sopenharmony_ci { 71bbbf1280Sopenharmony_ci sum += load16(cptr); 72bbbf1280Sopenharmony_ci cptr += 2; 73bbbf1280Sopenharmony_ci } 74bbbf1280Sopenharmony_ci 75bbbf1280Sopenharmony_ci if (nbytes & 1) 76bbbf1280Sopenharmony_ci { 77bbbf1280Sopenharmony_ci sum += *(uint8_t *)cptr; 78bbbf1280Sopenharmony_ci } 79bbbf1280Sopenharmony_ci 80bbbf1280Sopenharmony_ci return fold_and_swap(sum, swap); 81bbbf1280Sopenharmony_ci} 82