11cb0ef41Sopenharmony_ci/* adler32.c -- compute the Adler-32 checksum of a data stream 21cb0ef41Sopenharmony_ci * Copyright (C) 1995-2011, 2016 Mark Adler 31cb0ef41Sopenharmony_ci * For conditions of distribution and use, see copyright notice in zlib.h 41cb0ef41Sopenharmony_ci */ 51cb0ef41Sopenharmony_ci 61cb0ef41Sopenharmony_ci/* @(#) $Id$ */ 71cb0ef41Sopenharmony_ci 81cb0ef41Sopenharmony_ci#include "zutil.h" 91cb0ef41Sopenharmony_ci 101cb0ef41Sopenharmony_ci#define BASE 65521U /* largest prime smaller than 65536 */ 111cb0ef41Sopenharmony_ci#define NMAX 5552 121cb0ef41Sopenharmony_ci/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} 151cb0ef41Sopenharmony_ci#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); 161cb0ef41Sopenharmony_ci#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); 171cb0ef41Sopenharmony_ci#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); 181cb0ef41Sopenharmony_ci#define DO16(buf) DO8(buf,0); DO8(buf,8); 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_ci/* use NO_DIVIDE if your processor does not do division in hardware -- 211cb0ef41Sopenharmony_ci try it both ways to see which is faster */ 221cb0ef41Sopenharmony_ci#ifdef NO_DIVIDE 231cb0ef41Sopenharmony_ci/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 241cb0ef41Sopenharmony_ci (thank you to John Reiser for pointing this out) */ 251cb0ef41Sopenharmony_ci# define CHOP(a) \ 261cb0ef41Sopenharmony_ci do { \ 271cb0ef41Sopenharmony_ci unsigned long tmp = a >> 16; \ 281cb0ef41Sopenharmony_ci a &= 0xffffUL; \ 291cb0ef41Sopenharmony_ci a += (tmp << 4) - tmp; \ 301cb0ef41Sopenharmony_ci } while (0) 311cb0ef41Sopenharmony_ci# define MOD28(a) \ 321cb0ef41Sopenharmony_ci do { \ 331cb0ef41Sopenharmony_ci CHOP(a); \ 341cb0ef41Sopenharmony_ci if (a >= BASE) a -= BASE; \ 351cb0ef41Sopenharmony_ci } while (0) 361cb0ef41Sopenharmony_ci# define MOD(a) \ 371cb0ef41Sopenharmony_ci do { \ 381cb0ef41Sopenharmony_ci CHOP(a); \ 391cb0ef41Sopenharmony_ci MOD28(a); \ 401cb0ef41Sopenharmony_ci } while (0) 411cb0ef41Sopenharmony_ci# define MOD63(a) \ 421cb0ef41Sopenharmony_ci do { /* this assumes a is not negative */ \ 431cb0ef41Sopenharmony_ci z_off64_t tmp = a >> 32; \ 441cb0ef41Sopenharmony_ci a &= 0xffffffffL; \ 451cb0ef41Sopenharmony_ci a += (tmp << 8) - (tmp << 5) + tmp; \ 461cb0ef41Sopenharmony_ci tmp = a >> 16; \ 471cb0ef41Sopenharmony_ci a &= 0xffffL; \ 481cb0ef41Sopenharmony_ci a += (tmp << 4) - tmp; \ 491cb0ef41Sopenharmony_ci tmp = a >> 16; \ 501cb0ef41Sopenharmony_ci a &= 0xffffL; \ 511cb0ef41Sopenharmony_ci a += (tmp << 4) - tmp; \ 521cb0ef41Sopenharmony_ci if (a >= BASE) a -= BASE; \ 531cb0ef41Sopenharmony_ci } while (0) 541cb0ef41Sopenharmony_ci#else 551cb0ef41Sopenharmony_ci# define MOD(a) a %= BASE 561cb0ef41Sopenharmony_ci# define MOD28(a) a %= BASE 571cb0ef41Sopenharmony_ci# define MOD63(a) a %= BASE 581cb0ef41Sopenharmony_ci#endif 591cb0ef41Sopenharmony_ci 601cb0ef41Sopenharmony_ci#include "cpu_features.h" 611cb0ef41Sopenharmony_ci#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) 621cb0ef41Sopenharmony_ci#include "adler32_simd.h" 631cb0ef41Sopenharmony_ci#endif 641cb0ef41Sopenharmony_ci 651cb0ef41Sopenharmony_ci/* ========================================================================= */ 661cb0ef41Sopenharmony_ciuLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) { 671cb0ef41Sopenharmony_ci unsigned long sum2; 681cb0ef41Sopenharmony_ci unsigned n; 691cb0ef41Sopenharmony_ci 701cb0ef41Sopenharmony_ci#if defined(ADLER32_SIMD_SSSE3) 711cb0ef41Sopenharmony_ci if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3) 721cb0ef41Sopenharmony_ci return adler32_simd_(adler, buf, len); 731cb0ef41Sopenharmony_ci#elif defined(ADLER32_SIMD_NEON) 741cb0ef41Sopenharmony_ci if (buf != Z_NULL && len >= 64) 751cb0ef41Sopenharmony_ci return adler32_simd_(adler, buf, len); 761cb0ef41Sopenharmony_ci#endif 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci /* split Adler-32 into component sums */ 791cb0ef41Sopenharmony_ci sum2 = (adler >> 16) & 0xffff; 801cb0ef41Sopenharmony_ci adler &= 0xffff; 811cb0ef41Sopenharmony_ci 821cb0ef41Sopenharmony_ci /* in case user likes doing a byte at a time, keep it fast */ 831cb0ef41Sopenharmony_ci if (len == 1) { 841cb0ef41Sopenharmony_ci adler += buf[0]; 851cb0ef41Sopenharmony_ci if (adler >= BASE) 861cb0ef41Sopenharmony_ci adler -= BASE; 871cb0ef41Sopenharmony_ci sum2 += adler; 881cb0ef41Sopenharmony_ci if (sum2 >= BASE) 891cb0ef41Sopenharmony_ci sum2 -= BASE; 901cb0ef41Sopenharmony_ci return adler | (sum2 << 16); 911cb0ef41Sopenharmony_ci } 921cb0ef41Sopenharmony_ci 931cb0ef41Sopenharmony_ci#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) 941cb0ef41Sopenharmony_ci /* 951cb0ef41Sopenharmony_ci * Use SIMD to compute the adler32. Since this function can be 961cb0ef41Sopenharmony_ci * freely used, check CPU features here. zlib convention is to 971cb0ef41Sopenharmony_ci * call adler32(0, NULL, 0), before making calls to adler32(). 981cb0ef41Sopenharmony_ci * So this is a good early (and infrequent) place to cache CPU 991cb0ef41Sopenharmony_ci * features for those later, more interesting adler32() calls. 1001cb0ef41Sopenharmony_ci */ 1011cb0ef41Sopenharmony_ci if (buf == Z_NULL) { 1021cb0ef41Sopenharmony_ci if (!len) /* Assume user is calling adler32(0, NULL, 0); */ 1031cb0ef41Sopenharmony_ci cpu_check_features(); 1041cb0ef41Sopenharmony_ci return 1L; 1051cb0ef41Sopenharmony_ci } 1061cb0ef41Sopenharmony_ci#else 1071cb0ef41Sopenharmony_ci /* initial Adler-32 value (deferred check for len == 1 speed) */ 1081cb0ef41Sopenharmony_ci if (buf == Z_NULL) 1091cb0ef41Sopenharmony_ci return 1L; 1101cb0ef41Sopenharmony_ci#endif 1111cb0ef41Sopenharmony_ci 1121cb0ef41Sopenharmony_ci /* in case short lengths are provided, keep it somewhat fast */ 1131cb0ef41Sopenharmony_ci if (len < 16) { 1141cb0ef41Sopenharmony_ci while (len--) { 1151cb0ef41Sopenharmony_ci adler += *buf++; 1161cb0ef41Sopenharmony_ci sum2 += adler; 1171cb0ef41Sopenharmony_ci } 1181cb0ef41Sopenharmony_ci if (adler >= BASE) 1191cb0ef41Sopenharmony_ci adler -= BASE; 1201cb0ef41Sopenharmony_ci MOD28(sum2); /* only added so many BASE's */ 1211cb0ef41Sopenharmony_ci return adler | (sum2 << 16); 1221cb0ef41Sopenharmony_ci } 1231cb0ef41Sopenharmony_ci 1241cb0ef41Sopenharmony_ci /* do length NMAX blocks -- requires just one modulo operation */ 1251cb0ef41Sopenharmony_ci while (len >= NMAX) { 1261cb0ef41Sopenharmony_ci len -= NMAX; 1271cb0ef41Sopenharmony_ci n = NMAX / 16; /* NMAX is divisible by 16 */ 1281cb0ef41Sopenharmony_ci do { 1291cb0ef41Sopenharmony_ci DO16(buf); /* 16 sums unrolled */ 1301cb0ef41Sopenharmony_ci buf += 16; 1311cb0ef41Sopenharmony_ci } while (--n); 1321cb0ef41Sopenharmony_ci MOD(adler); 1331cb0ef41Sopenharmony_ci MOD(sum2); 1341cb0ef41Sopenharmony_ci } 1351cb0ef41Sopenharmony_ci 1361cb0ef41Sopenharmony_ci /* do remaining bytes (less than NMAX, still just one modulo) */ 1371cb0ef41Sopenharmony_ci if (len) { /* avoid modulos if none remaining */ 1381cb0ef41Sopenharmony_ci while (len >= 16) { 1391cb0ef41Sopenharmony_ci len -= 16; 1401cb0ef41Sopenharmony_ci DO16(buf); 1411cb0ef41Sopenharmony_ci buf += 16; 1421cb0ef41Sopenharmony_ci } 1431cb0ef41Sopenharmony_ci while (len--) { 1441cb0ef41Sopenharmony_ci adler += *buf++; 1451cb0ef41Sopenharmony_ci sum2 += adler; 1461cb0ef41Sopenharmony_ci } 1471cb0ef41Sopenharmony_ci MOD(adler); 1481cb0ef41Sopenharmony_ci MOD(sum2); 1491cb0ef41Sopenharmony_ci } 1501cb0ef41Sopenharmony_ci 1511cb0ef41Sopenharmony_ci /* return recombined sums */ 1521cb0ef41Sopenharmony_ci return adler | (sum2 << 16); 1531cb0ef41Sopenharmony_ci} 1541cb0ef41Sopenharmony_ci 1551cb0ef41Sopenharmony_ci/* ========================================================================= */ 1561cb0ef41Sopenharmony_ciuLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) { 1571cb0ef41Sopenharmony_ci return adler32_z(adler, buf, len); 1581cb0ef41Sopenharmony_ci} 1591cb0ef41Sopenharmony_ci 1601cb0ef41Sopenharmony_ci/* ========================================================================= */ 1611cb0ef41Sopenharmony_cilocal uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) { 1621cb0ef41Sopenharmony_ci unsigned long sum1; 1631cb0ef41Sopenharmony_ci unsigned long sum2; 1641cb0ef41Sopenharmony_ci unsigned rem; 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ci /* for negative len, return invalid adler32 as a clue for debugging */ 1671cb0ef41Sopenharmony_ci if (len2 < 0) 1681cb0ef41Sopenharmony_ci return 0xffffffffUL; 1691cb0ef41Sopenharmony_ci 1701cb0ef41Sopenharmony_ci /* the derivation of this formula is left as an exercise for the reader */ 1711cb0ef41Sopenharmony_ci MOD63(len2); /* assumes len2 >= 0 */ 1721cb0ef41Sopenharmony_ci rem = (unsigned)len2; 1731cb0ef41Sopenharmony_ci sum1 = adler1 & 0xffff; 1741cb0ef41Sopenharmony_ci sum2 = rem * sum1; 1751cb0ef41Sopenharmony_ci MOD(sum2); 1761cb0ef41Sopenharmony_ci sum1 += (adler2 & 0xffff) + BASE - 1; 1771cb0ef41Sopenharmony_ci sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; 1781cb0ef41Sopenharmony_ci if (sum1 >= BASE) sum1 -= BASE; 1791cb0ef41Sopenharmony_ci if (sum1 >= BASE) sum1 -= BASE; 1801cb0ef41Sopenharmony_ci if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); 1811cb0ef41Sopenharmony_ci if (sum2 >= BASE) sum2 -= BASE; 1821cb0ef41Sopenharmony_ci return sum1 | (sum2 << 16); 1831cb0ef41Sopenharmony_ci} 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_ci/* ========================================================================= */ 1861cb0ef41Sopenharmony_ciuLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) { 1871cb0ef41Sopenharmony_ci return adler32_combine_(adler1, adler2, len2); 1881cb0ef41Sopenharmony_ci} 1891cb0ef41Sopenharmony_ci 1901cb0ef41Sopenharmony_ciuLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) { 1911cb0ef41Sopenharmony_ci return adler32_combine_(adler1, adler2, len2); 1921cb0ef41Sopenharmony_ci} 193