1/* adler32.c -- compute the Adler-32 checksum of a data stream 2 * Copyright (C) 1995-2011, 2016 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6/* @(#) $Id$ */ 7 8#include "zutil.h" 9 10#define BASE 65521U /* largest prime smaller than 65536 */ 11#define NMAX 5552 12/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ 13 14#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} 15#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); 16#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); 17#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); 18#define DO16(buf) DO8(buf,0); DO8(buf,8); 19 20/* use NO_DIVIDE if your processor does not do division in hardware -- 21 try it both ways to see which is faster */ 22#ifdef NO_DIVIDE 23/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 24 (thank you to John Reiser for pointing this out) */ 25# define CHOP(a) \ 26 do { \ 27 unsigned long tmp = a >> 16; \ 28 a &= 0xffffUL; \ 29 a += (tmp << 4) - tmp; \ 30 } while (0) 31# define MOD28(a) \ 32 do { \ 33 CHOP(a); \ 34 if (a >= BASE) a -= BASE; \ 35 } while (0) 36# define MOD(a) \ 37 do { \ 38 CHOP(a); \ 39 MOD28(a); \ 40 } while (0) 41# define MOD63(a) \ 42 do { /* this assumes a is not negative */ \ 43 z_off64_t tmp = a >> 32; \ 44 a &= 0xffffffffL; \ 45 a += (tmp << 8) - (tmp << 5) + tmp; \ 46 tmp = a >> 16; \ 47 a &= 0xffffL; \ 48 a += (tmp << 4) - tmp; \ 49 tmp = a >> 16; \ 50 a &= 0xffffL; \ 51 a += (tmp << 4) - tmp; \ 52 if (a >= BASE) a -= BASE; \ 53 } while (0) 54#else 55# define MOD(a) a %= BASE 56# define MOD28(a) a %= BASE 57# define MOD63(a) a %= BASE 58#endif 59 60#include "cpu_features.h" 61#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) 62#include "adler32_simd.h" 63#endif 64 65/* ========================================================================= */ 66uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) { 67 unsigned long sum2; 68 unsigned n; 69 70#if defined(ADLER32_SIMD_SSSE3) 71 if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3) 72 return adler32_simd_(adler, buf, len); 73#elif defined(ADLER32_SIMD_NEON) 74 if (buf != Z_NULL && len >= 64) 75 return adler32_simd_(adler, buf, len); 76#endif 77 78 /* split Adler-32 into component sums */ 79 sum2 = (adler >> 16) & 0xffff; 80 adler &= 0xffff; 81 82 /* in case user likes doing a byte at a time, keep it fast */ 83 if (len == 1) { 84 adler += buf[0]; 85 if (adler >= BASE) 86 adler -= BASE; 87 sum2 += adler; 88 if (sum2 >= BASE) 89 sum2 -= BASE; 90 return adler | (sum2 << 16); 91 } 92 93#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON) 94 /* 95 * Use SIMD to compute the adler32. Since this function can be 96 * freely used, check CPU features here. zlib convention is to 97 * call adler32(0, NULL, 0), before making calls to adler32(). 98 * So this is a good early (and infrequent) place to cache CPU 99 * features for those later, more interesting adler32() calls. 100 */ 101 if (buf == Z_NULL) { 102 if (!len) /* Assume user is calling adler32(0, NULL, 0); */ 103 cpu_check_features(); 104 return 1L; 105 } 106#else 107 /* initial Adler-32 value (deferred check for len == 1 speed) */ 108 if (buf == Z_NULL) 109 return 1L; 110#endif 111 112 /* in case short lengths are provided, keep it somewhat fast */ 113 if (len < 16) { 114 while (len--) { 115 adler += *buf++; 116 sum2 += adler; 117 } 118 if (adler >= BASE) 119 adler -= BASE; 120 MOD28(sum2); /* only added so many BASE's */ 121 return adler | (sum2 << 16); 122 } 123 124 /* do length NMAX blocks -- requires just one modulo operation */ 125 while (len >= NMAX) { 126 len -= NMAX; 127 n = NMAX / 16; /* NMAX is divisible by 16 */ 128 do { 129 DO16(buf); /* 16 sums unrolled */ 130 buf += 16; 131 } while (--n); 132 MOD(adler); 133 MOD(sum2); 134 } 135 136 /* do remaining bytes (less than NMAX, still just one modulo) */ 137 if (len) { /* avoid modulos if none remaining */ 138 while (len >= 16) { 139 len -= 16; 140 DO16(buf); 141 buf += 16; 142 } 143 while (len--) { 144 adler += *buf++; 145 sum2 += adler; 146 } 147 MOD(adler); 148 MOD(sum2); 149 } 150 151 /* return recombined sums */ 152 return adler | (sum2 << 16); 153} 154 155/* ========================================================================= */ 156uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) { 157 return adler32_z(adler, buf, len); 158} 159 160/* ========================================================================= */ 161local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) { 162 unsigned long sum1; 163 unsigned long sum2; 164 unsigned rem; 165 166 /* for negative len, return invalid adler32 as a clue for debugging */ 167 if (len2 < 0) 168 return 0xffffffffUL; 169 170 /* the derivation of this formula is left as an exercise for the reader */ 171 MOD63(len2); /* assumes len2 >= 0 */ 172 rem = (unsigned)len2; 173 sum1 = adler1 & 0xffff; 174 sum2 = rem * sum1; 175 MOD(sum2); 176 sum1 += (adler2 & 0xffff) + BASE - 1; 177 sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; 178 if (sum1 >= BASE) sum1 -= BASE; 179 if (sum1 >= BASE) sum1 -= BASE; 180 if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); 181 if (sum2 >= BASE) sum2 -= BASE; 182 return sum1 | (sum2 << 16); 183} 184 185/* ========================================================================= */ 186uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) { 187 return adler32_combine_(adler1, adler2, len2); 188} 189 190uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) { 191 return adler32_combine_(adler1, adler2, len2); 192} 193