optimized-routines/networking/chksum.c

bbbf1280Sopenharmony_ci/*
bbbf1280Sopenharmony_ci * Compute 16-bit sum in ones' complement arithmetic (with end-around carry).
bbbf1280Sopenharmony_ci * This sum is often used as a simple checksum in networking.
bbbf1280Sopenharmony_ci *
bbbf1280Sopenharmony_ci * Copyright (c) 2020, Arm Limited.
bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
bbbf1280Sopenharmony_ci */
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci#include "networking.h"
bbbf1280Sopenharmony_ci#include "chksum_common.h"
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_cialways_inline
bbbf1280Sopenharmony_cistatic inline uint32_t
bbbf1280Sopenharmony_cislurp_head32(const void **pptr, uint32_t *nbytes)
bbbf1280Sopenharmony_ci{
bbbf1280Sopenharmony_ci    uint32_t sum = 0;
bbbf1280Sopenharmony_ci    Assert(*nbytes >= 4);
bbbf1280Sopenharmony_ci    uint32_t off = (uintptr_t) *pptr % 4;
bbbf1280Sopenharmony_ci    if (likely(off != 0))
bbbf1280Sopenharmony_ci    {
bbbf1280Sopenharmony_ci	/* Get rid of bytes 0..off-1 */
bbbf1280Sopenharmony_ci	const unsigned char *ptr32 = align_ptr(*pptr, 4);
bbbf1280Sopenharmony_ci	uint32_t mask = ~0U << (CHAR_BIT * off);
bbbf1280Sopenharmony_ci	sum = load32(ptr32) & mask;
bbbf1280Sopenharmony_ci	*pptr = ptr32 + 4;
bbbf1280Sopenharmony_ci	*nbytes -= 4 - off;
bbbf1280Sopenharmony_ci    }
bbbf1280Sopenharmony_ci    return sum;
bbbf1280Sopenharmony_ci}
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci/* Additional loop unrolling would help when not auto-vectorizing */
bbbf1280Sopenharmony_ciunsigned short
bbbf1280Sopenharmony_ci__chksum(const void *ptr, unsigned int nbytes)
bbbf1280Sopenharmony_ci{
bbbf1280Sopenharmony_ci    bool swap = false;
bbbf1280Sopenharmony_ci    uint64_t sum = 0;
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci    if (nbytes > 300)
bbbf1280Sopenharmony_ci    {
bbbf1280Sopenharmony_ci	/* 4-byte align pointer */
bbbf1280Sopenharmony_ci	swap = (uintptr_t) ptr & 1;
bbbf1280Sopenharmony_ci	sum = slurp_head32(&ptr, &nbytes);
bbbf1280Sopenharmony_ci    }
bbbf1280Sopenharmony_ci    /* Else benefit of aligning not worth the overhead */
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci    /* Sum all 16-byte chunks */
bbbf1280Sopenharmony_ci    const char *cptr = ptr;
bbbf1280Sopenharmony_ci    for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--)
bbbf1280Sopenharmony_ci    {
bbbf1280Sopenharmony_ci	uint64_t h0 = load32(cptr + 0);
bbbf1280Sopenharmony_ci	uint64_t h1 = load32(cptr + 4);
bbbf1280Sopenharmony_ci	uint64_t h2 = load32(cptr + 8);
bbbf1280Sopenharmony_ci	uint64_t h3 = load32(cptr + 12);
bbbf1280Sopenharmony_ci	sum += h0 + h1 + h2 + h3;
bbbf1280Sopenharmony_ci	cptr += 16;
bbbf1280Sopenharmony_ci    }
bbbf1280Sopenharmony_ci    nbytes %= 16;
bbbf1280Sopenharmony_ci    Assert(nbytes < 16);
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci    /* Handle any trailing 4-byte chunks */
bbbf1280Sopenharmony_ci    while (nbytes >= 4)
bbbf1280Sopenharmony_ci    {
bbbf1280Sopenharmony_ci	sum += load32(cptr);
bbbf1280Sopenharmony_ci	cptr += 4;
bbbf1280Sopenharmony_ci	nbytes -= 4;
bbbf1280Sopenharmony_ci    }
bbbf1280Sopenharmony_ci    Assert(nbytes < 4);
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci    if (nbytes & 2)
bbbf1280Sopenharmony_ci    {
bbbf1280Sopenharmony_ci	sum += load16(cptr);
bbbf1280Sopenharmony_ci	cptr += 2;
bbbf1280Sopenharmony_ci    }
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci    if (nbytes & 1)
bbbf1280Sopenharmony_ci    {
bbbf1280Sopenharmony_ci	sum += *(uint8_t *)cptr;
bbbf1280Sopenharmony_ci    }
bbbf1280Sopenharmony_ci
bbbf1280Sopenharmony_ci    return fold_and_swap(sum, swap);
bbbf1280Sopenharmony_ci}