1bbbf1280Sopenharmony_ci/*
2bbbf1280Sopenharmony_ci * Compute 16-bit sum in ones' complement arithmetic (with end-around carry).
3bbbf1280Sopenharmony_ci * This sum is often used as a simple checksum in networking.
4bbbf1280Sopenharmony_ci *
5bbbf1280Sopenharmony_ci * Copyright (c) 2020, Arm Limited.
6bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7bbbf1280Sopenharmony_ci */
8bbbf1280Sopenharmony_ci
9bbbf1280Sopenharmony_ci#include "networking.h"
10bbbf1280Sopenharmony_ci#include "chksum_common.h"
11bbbf1280Sopenharmony_ci
12bbbf1280Sopenharmony_cialways_inline
13bbbf1280Sopenharmony_cistatic inline uint32_t
14bbbf1280Sopenharmony_cislurp_head32(const void **pptr, uint32_t *nbytes)
15bbbf1280Sopenharmony_ci{
16bbbf1280Sopenharmony_ci    uint32_t sum = 0;
17bbbf1280Sopenharmony_ci    Assert(*nbytes >= 4);
18bbbf1280Sopenharmony_ci    uint32_t off = (uintptr_t) *pptr % 4;
19bbbf1280Sopenharmony_ci    if (likely(off != 0))
20bbbf1280Sopenharmony_ci    {
21bbbf1280Sopenharmony_ci	/* Get rid of bytes 0..off-1 */
22bbbf1280Sopenharmony_ci	const unsigned char *ptr32 = align_ptr(*pptr, 4);
23bbbf1280Sopenharmony_ci	uint32_t mask = ~0U << (CHAR_BIT * off);
24bbbf1280Sopenharmony_ci	sum = load32(ptr32) & mask;
25bbbf1280Sopenharmony_ci	*pptr = ptr32 + 4;
26bbbf1280Sopenharmony_ci	*nbytes -= 4 - off;
27bbbf1280Sopenharmony_ci    }
28bbbf1280Sopenharmony_ci    return sum;
29bbbf1280Sopenharmony_ci}
30bbbf1280Sopenharmony_ci
31bbbf1280Sopenharmony_ci/* Additional loop unrolling would help when not auto-vectorizing */
32bbbf1280Sopenharmony_ciunsigned short
33bbbf1280Sopenharmony_ci__chksum(const void *ptr, unsigned int nbytes)
34bbbf1280Sopenharmony_ci{
35bbbf1280Sopenharmony_ci    bool swap = false;
36bbbf1280Sopenharmony_ci    uint64_t sum = 0;
37bbbf1280Sopenharmony_ci
38bbbf1280Sopenharmony_ci    if (nbytes > 300)
39bbbf1280Sopenharmony_ci    {
40bbbf1280Sopenharmony_ci	/* 4-byte align pointer */
41bbbf1280Sopenharmony_ci	swap = (uintptr_t) ptr & 1;
42bbbf1280Sopenharmony_ci	sum = slurp_head32(&ptr, &nbytes);
43bbbf1280Sopenharmony_ci    }
44bbbf1280Sopenharmony_ci    /* Else benefit of aligning not worth the overhead */
45bbbf1280Sopenharmony_ci
46bbbf1280Sopenharmony_ci    /* Sum all 16-byte chunks */
47bbbf1280Sopenharmony_ci    const char *cptr = ptr;
48bbbf1280Sopenharmony_ci    for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--)
49bbbf1280Sopenharmony_ci    {
50bbbf1280Sopenharmony_ci	uint64_t h0 = load32(cptr + 0);
51bbbf1280Sopenharmony_ci	uint64_t h1 = load32(cptr + 4);
52bbbf1280Sopenharmony_ci	uint64_t h2 = load32(cptr + 8);
53bbbf1280Sopenharmony_ci	uint64_t h3 = load32(cptr + 12);
54bbbf1280Sopenharmony_ci	sum += h0 + h1 + h2 + h3;
55bbbf1280Sopenharmony_ci	cptr += 16;
56bbbf1280Sopenharmony_ci    }
57bbbf1280Sopenharmony_ci    nbytes %= 16;
58bbbf1280Sopenharmony_ci    Assert(nbytes < 16);
59bbbf1280Sopenharmony_ci
60bbbf1280Sopenharmony_ci    /* Handle any trailing 4-byte chunks */
61bbbf1280Sopenharmony_ci    while (nbytes >= 4)
62bbbf1280Sopenharmony_ci    {
63bbbf1280Sopenharmony_ci	sum += load32(cptr);
64bbbf1280Sopenharmony_ci	cptr += 4;
65bbbf1280Sopenharmony_ci	nbytes -= 4;
66bbbf1280Sopenharmony_ci    }
67bbbf1280Sopenharmony_ci    Assert(nbytes < 4);
68bbbf1280Sopenharmony_ci
69bbbf1280Sopenharmony_ci    if (nbytes & 2)
70bbbf1280Sopenharmony_ci    {
71bbbf1280Sopenharmony_ci	sum += load16(cptr);
72bbbf1280Sopenharmony_ci	cptr += 2;
73bbbf1280Sopenharmony_ci    }
74bbbf1280Sopenharmony_ci
75bbbf1280Sopenharmony_ci    if (nbytes & 1)
76bbbf1280Sopenharmony_ci    {
77bbbf1280Sopenharmony_ci	sum += *(uint8_t *)cptr;
78bbbf1280Sopenharmony_ci    }
79bbbf1280Sopenharmony_ci
80bbbf1280Sopenharmony_ci    return fold_and_swap(sum, swap);
81bbbf1280Sopenharmony_ci}
82