xref: /third_party/node/deps/zlib/adler32.c (revision 1cb0ef41)
1/* adler32.c -- compute the Adler-32 checksum of a data stream
2 * Copyright (C) 1995-2011, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/* @(#) $Id$ */
7
8#include "zutil.h"
9
10#define BASE 65521U     /* largest prime smaller than 65536 */
11#define NMAX 5552
12/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
13
14#define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
15#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
16#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
17#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
18#define DO16(buf)   DO8(buf,0); DO8(buf,8);
19
20/* use NO_DIVIDE if your processor does not do division in hardware --
21   try it both ways to see which is faster */
22#ifdef NO_DIVIDE
23/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
24   (thank you to John Reiser for pointing this out) */
25#  define CHOP(a) \
26    do { \
27        unsigned long tmp = a >> 16; \
28        a &= 0xffffUL; \
29        a += (tmp << 4) - tmp; \
30    } while (0)
31#  define MOD28(a) \
32    do { \
33        CHOP(a); \
34        if (a >= BASE) a -= BASE; \
35    } while (0)
36#  define MOD(a) \
37    do { \
38        CHOP(a); \
39        MOD28(a); \
40    } while (0)
41#  define MOD63(a) \
42    do { /* this assumes a is not negative */ \
43        z_off64_t tmp = a >> 32; \
44        a &= 0xffffffffL; \
45        a += (tmp << 8) - (tmp << 5) + tmp; \
46        tmp = a >> 16; \
47        a &= 0xffffL; \
48        a += (tmp << 4) - tmp; \
49        tmp = a >> 16; \
50        a &= 0xffffL; \
51        a += (tmp << 4) - tmp; \
52        if (a >= BASE) a -= BASE; \
53    } while (0)
54#else
55#  define MOD(a) a %= BASE
56#  define MOD28(a) a %= BASE
57#  define MOD63(a) a %= BASE
58#endif
59
60#include "cpu_features.h"
61#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
62#include "adler32_simd.h"
63#endif
64
65/* ========================================================================= */
66uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) {
67    unsigned long sum2;
68    unsigned n;
69
70#if defined(ADLER32_SIMD_SSSE3)
71    if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3)
72        return adler32_simd_(adler, buf, len);
73#elif defined(ADLER32_SIMD_NEON)
74    if (buf != Z_NULL && len >= 64)
75        return adler32_simd_(adler, buf, len);
76#endif
77
78    /* split Adler-32 into component sums */
79    sum2 = (adler >> 16) & 0xffff;
80    adler &= 0xffff;
81
82    /* in case user likes doing a byte at a time, keep it fast */
83    if (len == 1) {
84        adler += buf[0];
85        if (adler >= BASE)
86            adler -= BASE;
87        sum2 += adler;
88        if (sum2 >= BASE)
89            sum2 -= BASE;
90        return adler | (sum2 << 16);
91    }
92
93#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
94    /*
95     * Use SIMD to compute the adler32. Since this function can be
96     * freely used, check CPU features here. zlib convention is to
97     * call adler32(0, NULL, 0), before making calls to adler32().
98     * So this is a good early (and infrequent) place to cache CPU
99     * features for those later, more interesting adler32() calls.
100     */
101    if (buf == Z_NULL) {
102        if (!len) /* Assume user is calling adler32(0, NULL, 0); */
103            cpu_check_features();
104        return 1L;
105    }
106#else
107    /* initial Adler-32 value (deferred check for len == 1 speed) */
108    if (buf == Z_NULL)
109        return 1L;
110#endif
111
112    /* in case short lengths are provided, keep it somewhat fast */
113    if (len < 16) {
114        while (len--) {
115            adler += *buf++;
116            sum2 += adler;
117        }
118        if (adler >= BASE)
119            adler -= BASE;
120        MOD28(sum2);            /* only added so many BASE's */
121        return adler | (sum2 << 16);
122    }
123
124    /* do length NMAX blocks -- requires just one modulo operation */
125    while (len >= NMAX) {
126        len -= NMAX;
127        n = NMAX / 16;          /* NMAX is divisible by 16 */
128        do {
129            DO16(buf);          /* 16 sums unrolled */
130            buf += 16;
131        } while (--n);
132        MOD(adler);
133        MOD(sum2);
134    }
135
136    /* do remaining bytes (less than NMAX, still just one modulo) */
137    if (len) {                  /* avoid modulos if none remaining */
138        while (len >= 16) {
139            len -= 16;
140            DO16(buf);
141            buf += 16;
142        }
143        while (len--) {
144            adler += *buf++;
145            sum2 += adler;
146        }
147        MOD(adler);
148        MOD(sum2);
149    }
150
151    /* return recombined sums */
152    return adler | (sum2 << 16);
153}
154
155/* ========================================================================= */
156uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) {
157    return adler32_z(adler, buf, len);
158}
159
160/* ========================================================================= */
161local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) {
162    unsigned long sum1;
163    unsigned long sum2;
164    unsigned rem;
165
166    /* for negative len, return invalid adler32 as a clue for debugging */
167    if (len2 < 0)
168        return 0xffffffffUL;
169
170    /* the derivation of this formula is left as an exercise for the reader */
171    MOD63(len2);                /* assumes len2 >= 0 */
172    rem = (unsigned)len2;
173    sum1 = adler1 & 0xffff;
174    sum2 = rem * sum1;
175    MOD(sum2);
176    sum1 += (adler2 & 0xffff) + BASE - 1;
177    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
178    if (sum1 >= BASE) sum1 -= BASE;
179    if (sum1 >= BASE) sum1 -= BASE;
180    if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
181    if (sum2 >= BASE) sum2 -= BASE;
182    return sum1 | (sum2 << 16);
183}
184
185/* ========================================================================= */
186uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) {
187    return adler32_combine_(adler1, adler2, len2);
188}
189
190uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) {
191    return adler32_combine_(adler1, adler2, len2);
192}
193