1e1051a39Sopenharmony_ci/*
2e1051a39Sopenharmony_ci * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci *
4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci * this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci */
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci#include <string.h>
11e1051a39Sopenharmony_ci#include <openssl/crypto.h>
12e1051a39Sopenharmony_ci#include "internal/cryptlib.h"
13e1051a39Sopenharmony_ci#include "internal/endian.h"
14e1051a39Sopenharmony_ci#include "crypto/modes.h"
15e1051a39Sopenharmony_ci
16e1051a39Sopenharmony_ci#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17e1051a39Sopenharmony_citypedef size_t size_t_aX __attribute((__aligned__(1)));
18e1051a39Sopenharmony_ci#else
19e1051a39Sopenharmony_citypedef size_t size_t_aX;
20e1051a39Sopenharmony_ci#endif
21e1051a39Sopenharmony_ci
22e1051a39Sopenharmony_ci#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23e1051a39Sopenharmony_ci/* redefine, because alignment is ensured */
24e1051a39Sopenharmony_ci# undef  GETU32
25e1051a39Sopenharmony_ci# define GETU32(p)       BSWAP4(*(const u32 *)(p))
26e1051a39Sopenharmony_ci# undef  PUTU32
27e1051a39Sopenharmony_ci# define PUTU32(p,v)     *(u32 *)(p) = BSWAP4(v)
28e1051a39Sopenharmony_ci#endif
29e1051a39Sopenharmony_ci
30e1051a39Sopenharmony_ci#define PACK(s)         ((size_t)(s)<<(sizeof(size_t)*8-16))
31e1051a39Sopenharmony_ci#define REDUCE1BIT(V)   do { \
32e1051a39Sopenharmony_ci        if (sizeof(size_t)==8) { \
33e1051a39Sopenharmony_ci                u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34e1051a39Sopenharmony_ci                V.lo  = (V.hi<<63)|(V.lo>>1); \
35e1051a39Sopenharmony_ci                V.hi  = (V.hi>>1 )^T; \
36e1051a39Sopenharmony_ci        } \
37e1051a39Sopenharmony_ci        else { \
38e1051a39Sopenharmony_ci                u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39e1051a39Sopenharmony_ci                V.lo  = (V.hi<<63)|(V.lo>>1); \
40e1051a39Sopenharmony_ci                V.hi  = (V.hi>>1 )^((u64)T<<32); \
41e1051a39Sopenharmony_ci        } \
42e1051a39Sopenharmony_ci} while(0)
43e1051a39Sopenharmony_ci
44e1051a39Sopenharmony_ci/*-
45e1051a39Sopenharmony_ci * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46e1051a39Sopenharmony_ci * never be set to 8. 8 is effectively reserved for testing purposes.
47e1051a39Sopenharmony_ci * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48e1051a39Sopenharmony_ci * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49e1051a39Sopenharmony_ci * whole spectrum of possible table driven implementations. Why? In
50e1051a39Sopenharmony_ci * non-"Shoup's" case memory access pattern is segmented in such manner,
51e1051a39Sopenharmony_ci * that it's trivial to see that cache timing information can reveal
52e1051a39Sopenharmony_ci * fair portion of intermediate hash value. Given that ciphertext is
53e1051a39Sopenharmony_ci * always available to attacker, it's possible for him to attempt to
54e1051a39Sopenharmony_ci * deduce secret parameter H and if successful, tamper with messages
55e1051a39Sopenharmony_ci * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56e1051a39Sopenharmony_ci * not as trivial, but there is no reason to believe that it's resistant
57e1051a39Sopenharmony_ci * to cache-timing attack. And the thing about "8-bit" implementation is
58e1051a39Sopenharmony_ci * that it consumes 16 (sixteen) times more memory, 4KB per individual
59e1051a39Sopenharmony_ci * key + 1KB shared. Well, on pros side it should be twice as fast as
60e1051a39Sopenharmony_ci * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61e1051a39Sopenharmony_ci * was observed to run ~75% faster, closer to 100% for commercial
62e1051a39Sopenharmony_ci * compilers... Yet "4-bit" procedure is preferred, because it's
63e1051a39Sopenharmony_ci * believed to provide better security-performance balance and adequate
64e1051a39Sopenharmony_ci * all-round performance. "All-round" refers to things like:
65e1051a39Sopenharmony_ci *
66e1051a39Sopenharmony_ci * - shorter setup time effectively improves overall timing for
67e1051a39Sopenharmony_ci *   handling short messages;
68e1051a39Sopenharmony_ci * - larger table allocation can become unbearable because of VM
69e1051a39Sopenharmony_ci *   subsystem penalties (for example on Windows large enough free
70e1051a39Sopenharmony_ci *   results in VM working set trimming, meaning that consequent
71e1051a39Sopenharmony_ci *   malloc would immediately incur working set expansion);
72e1051a39Sopenharmony_ci * - larger table has larger cache footprint, which can affect
73e1051a39Sopenharmony_ci *   performance of other code paths (not necessarily even from same
74e1051a39Sopenharmony_ci *   thread in Hyper-Threading world);
75e1051a39Sopenharmony_ci *
76e1051a39Sopenharmony_ci * Value of 1 is not appropriate for performance reasons.
77e1051a39Sopenharmony_ci */
78e1051a39Sopenharmony_ci#if     TABLE_BITS==8
79e1051a39Sopenharmony_ci
80e1051a39Sopenharmony_cistatic void gcm_init_8bit(u128 Htable[256], u64 H[2])
81e1051a39Sopenharmony_ci{
82e1051a39Sopenharmony_ci    int i, j;
83e1051a39Sopenharmony_ci    u128 V;
84e1051a39Sopenharmony_ci
85e1051a39Sopenharmony_ci    Htable[0].hi = 0;
86e1051a39Sopenharmony_ci    Htable[0].lo = 0;
87e1051a39Sopenharmony_ci    V.hi = H[0];
88e1051a39Sopenharmony_ci    V.lo = H[1];
89e1051a39Sopenharmony_ci
90e1051a39Sopenharmony_ci    for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91e1051a39Sopenharmony_ci        REDUCE1BIT(V);
92e1051a39Sopenharmony_ci        Htable[i] = V;
93e1051a39Sopenharmony_ci    }
94e1051a39Sopenharmony_ci
95e1051a39Sopenharmony_ci    for (i = 2; i < 256; i <<= 1) {
96e1051a39Sopenharmony_ci        u128 *Hi = Htable + i, H0 = *Hi;
97e1051a39Sopenharmony_ci        for (j = 1; j < i; ++j) {
98e1051a39Sopenharmony_ci            Hi[j].hi = H0.hi ^ Htable[j].hi;
99e1051a39Sopenharmony_ci            Hi[j].lo = H0.lo ^ Htable[j].lo;
100e1051a39Sopenharmony_ci        }
101e1051a39Sopenharmony_ci    }
102e1051a39Sopenharmony_ci}
103e1051a39Sopenharmony_ci
104e1051a39Sopenharmony_cistatic void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105e1051a39Sopenharmony_ci{
106e1051a39Sopenharmony_ci    u128 Z = { 0, 0 };
107e1051a39Sopenharmony_ci    const u8 *xi = (const u8 *)Xi + 15;
108e1051a39Sopenharmony_ci    size_t rem, n = *xi;
109e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
110e1051a39Sopenharmony_ci    static const size_t rem_8bit[256] = {
111e1051a39Sopenharmony_ci        PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112e1051a39Sopenharmony_ci        PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113e1051a39Sopenharmony_ci        PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114e1051a39Sopenharmony_ci        PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115e1051a39Sopenharmony_ci        PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116e1051a39Sopenharmony_ci        PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117e1051a39Sopenharmony_ci        PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118e1051a39Sopenharmony_ci        PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119e1051a39Sopenharmony_ci        PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120e1051a39Sopenharmony_ci        PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121e1051a39Sopenharmony_ci        PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122e1051a39Sopenharmony_ci        PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123e1051a39Sopenharmony_ci        PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124e1051a39Sopenharmony_ci        PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125e1051a39Sopenharmony_ci        PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126e1051a39Sopenharmony_ci        PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127e1051a39Sopenharmony_ci        PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128e1051a39Sopenharmony_ci        PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129e1051a39Sopenharmony_ci        PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130e1051a39Sopenharmony_ci        PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131e1051a39Sopenharmony_ci        PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132e1051a39Sopenharmony_ci        PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133e1051a39Sopenharmony_ci        PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134e1051a39Sopenharmony_ci        PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135e1051a39Sopenharmony_ci        PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136e1051a39Sopenharmony_ci        PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137e1051a39Sopenharmony_ci        PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138e1051a39Sopenharmony_ci        PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139e1051a39Sopenharmony_ci        PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140e1051a39Sopenharmony_ci        PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141e1051a39Sopenharmony_ci        PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142e1051a39Sopenharmony_ci        PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143e1051a39Sopenharmony_ci        PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144e1051a39Sopenharmony_ci        PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145e1051a39Sopenharmony_ci        PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146e1051a39Sopenharmony_ci        PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147e1051a39Sopenharmony_ci        PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148e1051a39Sopenharmony_ci        PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149e1051a39Sopenharmony_ci        PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150e1051a39Sopenharmony_ci        PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151e1051a39Sopenharmony_ci        PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152e1051a39Sopenharmony_ci        PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153e1051a39Sopenharmony_ci        PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154e1051a39Sopenharmony_ci        PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155e1051a39Sopenharmony_ci        PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156e1051a39Sopenharmony_ci        PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157e1051a39Sopenharmony_ci        PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158e1051a39Sopenharmony_ci        PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159e1051a39Sopenharmony_ci        PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160e1051a39Sopenharmony_ci        PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161e1051a39Sopenharmony_ci        PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162e1051a39Sopenharmony_ci        PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163e1051a39Sopenharmony_ci        PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164e1051a39Sopenharmony_ci        PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165e1051a39Sopenharmony_ci        PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166e1051a39Sopenharmony_ci        PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167e1051a39Sopenharmony_ci        PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168e1051a39Sopenharmony_ci        PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169e1051a39Sopenharmony_ci        PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170e1051a39Sopenharmony_ci        PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171e1051a39Sopenharmony_ci        PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172e1051a39Sopenharmony_ci        PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173e1051a39Sopenharmony_ci        PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174e1051a39Sopenharmony_ci        PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175e1051a39Sopenharmony_ci    };
176e1051a39Sopenharmony_ci
177e1051a39Sopenharmony_ci    while (1) {
178e1051a39Sopenharmony_ci        Z.hi ^= Htable[n].hi;
179e1051a39Sopenharmony_ci        Z.lo ^= Htable[n].lo;
180e1051a39Sopenharmony_ci
181e1051a39Sopenharmony_ci        if ((u8 *)Xi == xi)
182e1051a39Sopenharmony_ci            break;
183e1051a39Sopenharmony_ci
184e1051a39Sopenharmony_ci        n = *(--xi);
185e1051a39Sopenharmony_ci
186e1051a39Sopenharmony_ci        rem = (size_t)Z.lo & 0xff;
187e1051a39Sopenharmony_ci        Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188e1051a39Sopenharmony_ci        Z.hi = (Z.hi >> 8);
189e1051a39Sopenharmony_ci        if (sizeof(size_t) == 8)
190e1051a39Sopenharmony_ci            Z.hi ^= rem_8bit[rem];
191e1051a39Sopenharmony_ci        else
192e1051a39Sopenharmony_ci            Z.hi ^= (u64)rem_8bit[rem] << 32;
193e1051a39Sopenharmony_ci    }
194e1051a39Sopenharmony_ci
195e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN) {
196e1051a39Sopenharmony_ci# ifdef BSWAP8
197e1051a39Sopenharmony_ci        Xi[0] = BSWAP8(Z.hi);
198e1051a39Sopenharmony_ci        Xi[1] = BSWAP8(Z.lo);
199e1051a39Sopenharmony_ci# else
200e1051a39Sopenharmony_ci        u8 *p = (u8 *)Xi;
201e1051a39Sopenharmony_ci        u32 v;
202e1051a39Sopenharmony_ci        v = (u32)(Z.hi >> 32);
203e1051a39Sopenharmony_ci        PUTU32(p, v);
204e1051a39Sopenharmony_ci        v = (u32)(Z.hi);
205e1051a39Sopenharmony_ci        PUTU32(p + 4, v);
206e1051a39Sopenharmony_ci        v = (u32)(Z.lo >> 32);
207e1051a39Sopenharmony_ci        PUTU32(p + 8, v);
208e1051a39Sopenharmony_ci        v = (u32)(Z.lo);
209e1051a39Sopenharmony_ci        PUTU32(p + 12, v);
210e1051a39Sopenharmony_ci# endif
211e1051a39Sopenharmony_ci    } else {
212e1051a39Sopenharmony_ci        Xi[0] = Z.hi;
213e1051a39Sopenharmony_ci        Xi[1] = Z.lo;
214e1051a39Sopenharmony_ci    }
215e1051a39Sopenharmony_ci}
216e1051a39Sopenharmony_ci
217e1051a39Sopenharmony_ci# define GCM_MUL(ctx)      gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
218e1051a39Sopenharmony_ci
219e1051a39Sopenharmony_ci#elif   TABLE_BITS==4
220e1051a39Sopenharmony_ci
221e1051a39Sopenharmony_cistatic void gcm_init_4bit(u128 Htable[16], u64 H[2])
222e1051a39Sopenharmony_ci{
223e1051a39Sopenharmony_ci    u128 V;
224e1051a39Sopenharmony_ci# if defined(OPENSSL_SMALL_FOOTPRINT)
225e1051a39Sopenharmony_ci    int i;
226e1051a39Sopenharmony_ci# endif
227e1051a39Sopenharmony_ci
228e1051a39Sopenharmony_ci    Htable[0].hi = 0;
229e1051a39Sopenharmony_ci    Htable[0].lo = 0;
230e1051a39Sopenharmony_ci    V.hi = H[0];
231e1051a39Sopenharmony_ci    V.lo = H[1];
232e1051a39Sopenharmony_ci
233e1051a39Sopenharmony_ci# if defined(OPENSSL_SMALL_FOOTPRINT)
234e1051a39Sopenharmony_ci    for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235e1051a39Sopenharmony_ci        REDUCE1BIT(V);
236e1051a39Sopenharmony_ci        Htable[i] = V;
237e1051a39Sopenharmony_ci    }
238e1051a39Sopenharmony_ci
239e1051a39Sopenharmony_ci    for (i = 2; i < 16; i <<= 1) {
240e1051a39Sopenharmony_ci        u128 *Hi = Htable + i;
241e1051a39Sopenharmony_ci        int j;
242e1051a39Sopenharmony_ci        for (V = *Hi, j = 1; j < i; ++j) {
243e1051a39Sopenharmony_ci            Hi[j].hi = V.hi ^ Htable[j].hi;
244e1051a39Sopenharmony_ci            Hi[j].lo = V.lo ^ Htable[j].lo;
245e1051a39Sopenharmony_ci        }
246e1051a39Sopenharmony_ci    }
247e1051a39Sopenharmony_ci# else
248e1051a39Sopenharmony_ci    Htable[8] = V;
249e1051a39Sopenharmony_ci    REDUCE1BIT(V);
250e1051a39Sopenharmony_ci    Htable[4] = V;
251e1051a39Sopenharmony_ci    REDUCE1BIT(V);
252e1051a39Sopenharmony_ci    Htable[2] = V;
253e1051a39Sopenharmony_ci    REDUCE1BIT(V);
254e1051a39Sopenharmony_ci    Htable[1] = V;
255e1051a39Sopenharmony_ci    Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256e1051a39Sopenharmony_ci    V = Htable[4];
257e1051a39Sopenharmony_ci    Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258e1051a39Sopenharmony_ci    Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259e1051a39Sopenharmony_ci    Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260e1051a39Sopenharmony_ci    V = Htable[8];
261e1051a39Sopenharmony_ci    Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262e1051a39Sopenharmony_ci    Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263e1051a39Sopenharmony_ci    Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264e1051a39Sopenharmony_ci    Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265e1051a39Sopenharmony_ci    Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266e1051a39Sopenharmony_ci    Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267e1051a39Sopenharmony_ci    Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268e1051a39Sopenharmony_ci# endif
269e1051a39Sopenharmony_ci# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270e1051a39Sopenharmony_ci    /*
271e1051a39Sopenharmony_ci     * ARM assembler expects specific dword order in Htable.
272e1051a39Sopenharmony_ci     */
273e1051a39Sopenharmony_ci    {
274e1051a39Sopenharmony_ci        int j;
275e1051a39Sopenharmony_ci        DECLARE_IS_ENDIAN;
276e1051a39Sopenharmony_ci
277e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
278e1051a39Sopenharmony_ci            for (j = 0; j < 16; ++j) {
279e1051a39Sopenharmony_ci                V = Htable[j];
280e1051a39Sopenharmony_ci                Htable[j].hi = V.lo;
281e1051a39Sopenharmony_ci                Htable[j].lo = V.hi;
282e1051a39Sopenharmony_ci        } else
283e1051a39Sopenharmony_ci            for (j = 0; j < 16; ++j) {
284e1051a39Sopenharmony_ci                V = Htable[j];
285e1051a39Sopenharmony_ci                Htable[j].hi = V.lo << 32 | V.lo >> 32;
286e1051a39Sopenharmony_ci                Htable[j].lo = V.hi << 32 | V.hi >> 32;
287e1051a39Sopenharmony_ci            }
288e1051a39Sopenharmony_ci    }
289e1051a39Sopenharmony_ci# endif
290e1051a39Sopenharmony_ci}
291e1051a39Sopenharmony_ci
292e1051a39Sopenharmony_ci# ifndef GHASH_ASM
293e1051a39Sopenharmony_cistatic const size_t rem_4bit[16] = {
294e1051a39Sopenharmony_ci    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295e1051a39Sopenharmony_ci    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296e1051a39Sopenharmony_ci    PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297e1051a39Sopenharmony_ci    PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298e1051a39Sopenharmony_ci};
299e1051a39Sopenharmony_ci
300e1051a39Sopenharmony_cistatic void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301e1051a39Sopenharmony_ci{
302e1051a39Sopenharmony_ci    u128 Z;
303e1051a39Sopenharmony_ci    int cnt = 15;
304e1051a39Sopenharmony_ci    size_t rem, nlo, nhi;
305e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
306e1051a39Sopenharmony_ci
307e1051a39Sopenharmony_ci    nlo = ((const u8 *)Xi)[15];
308e1051a39Sopenharmony_ci    nhi = nlo >> 4;
309e1051a39Sopenharmony_ci    nlo &= 0xf;
310e1051a39Sopenharmony_ci
311e1051a39Sopenharmony_ci    Z.hi = Htable[nlo].hi;
312e1051a39Sopenharmony_ci    Z.lo = Htable[nlo].lo;
313e1051a39Sopenharmony_ci
314e1051a39Sopenharmony_ci    while (1) {
315e1051a39Sopenharmony_ci        rem = (size_t)Z.lo & 0xf;
316e1051a39Sopenharmony_ci        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317e1051a39Sopenharmony_ci        Z.hi = (Z.hi >> 4);
318e1051a39Sopenharmony_ci        if (sizeof(size_t) == 8)
319e1051a39Sopenharmony_ci            Z.hi ^= rem_4bit[rem];
320e1051a39Sopenharmony_ci        else
321e1051a39Sopenharmony_ci            Z.hi ^= (u64)rem_4bit[rem] << 32;
322e1051a39Sopenharmony_ci
323e1051a39Sopenharmony_ci        Z.hi ^= Htable[nhi].hi;
324e1051a39Sopenharmony_ci        Z.lo ^= Htable[nhi].lo;
325e1051a39Sopenharmony_ci
326e1051a39Sopenharmony_ci        if (--cnt < 0)
327e1051a39Sopenharmony_ci            break;
328e1051a39Sopenharmony_ci
329e1051a39Sopenharmony_ci        nlo = ((const u8 *)Xi)[cnt];
330e1051a39Sopenharmony_ci        nhi = nlo >> 4;
331e1051a39Sopenharmony_ci        nlo &= 0xf;
332e1051a39Sopenharmony_ci
333e1051a39Sopenharmony_ci        rem = (size_t)Z.lo & 0xf;
334e1051a39Sopenharmony_ci        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335e1051a39Sopenharmony_ci        Z.hi = (Z.hi >> 4);
336e1051a39Sopenharmony_ci        if (sizeof(size_t) == 8)
337e1051a39Sopenharmony_ci            Z.hi ^= rem_4bit[rem];
338e1051a39Sopenharmony_ci        else
339e1051a39Sopenharmony_ci            Z.hi ^= (u64)rem_4bit[rem] << 32;
340e1051a39Sopenharmony_ci
341e1051a39Sopenharmony_ci        Z.hi ^= Htable[nlo].hi;
342e1051a39Sopenharmony_ci        Z.lo ^= Htable[nlo].lo;
343e1051a39Sopenharmony_ci    }
344e1051a39Sopenharmony_ci
345e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN) {
346e1051a39Sopenharmony_ci#  ifdef BSWAP8
347e1051a39Sopenharmony_ci        Xi[0] = BSWAP8(Z.hi);
348e1051a39Sopenharmony_ci        Xi[1] = BSWAP8(Z.lo);
349e1051a39Sopenharmony_ci#  else
350e1051a39Sopenharmony_ci        u8 *p = (u8 *)Xi;
351e1051a39Sopenharmony_ci        u32 v;
352e1051a39Sopenharmony_ci        v = (u32)(Z.hi >> 32);
353e1051a39Sopenharmony_ci        PUTU32(p, v);
354e1051a39Sopenharmony_ci        v = (u32)(Z.hi);
355e1051a39Sopenharmony_ci        PUTU32(p + 4, v);
356e1051a39Sopenharmony_ci        v = (u32)(Z.lo >> 32);
357e1051a39Sopenharmony_ci        PUTU32(p + 8, v);
358e1051a39Sopenharmony_ci        v = (u32)(Z.lo);
359e1051a39Sopenharmony_ci        PUTU32(p + 12, v);
360e1051a39Sopenharmony_ci#  endif
361e1051a39Sopenharmony_ci    } else {
362e1051a39Sopenharmony_ci        Xi[0] = Z.hi;
363e1051a39Sopenharmony_ci        Xi[1] = Z.lo;
364e1051a39Sopenharmony_ci    }
365e1051a39Sopenharmony_ci}
366e1051a39Sopenharmony_ci
367e1051a39Sopenharmony_ci#  if !defined(OPENSSL_SMALL_FOOTPRINT)
368e1051a39Sopenharmony_ci/*
369e1051a39Sopenharmony_ci * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370e1051a39Sopenharmony_ci * details... Compiler-generated code doesn't seem to give any
371e1051a39Sopenharmony_ci * performance improvement, at least not on x86[_64]. It's here
372e1051a39Sopenharmony_ci * mostly as reference and a placeholder for possible future
373e1051a39Sopenharmony_ci * non-trivial optimization[s]...
374e1051a39Sopenharmony_ci */
375e1051a39Sopenharmony_cistatic void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376e1051a39Sopenharmony_ci                           const u8 *inp, size_t len)
377e1051a39Sopenharmony_ci{
378e1051a39Sopenharmony_ci    u128 Z;
379e1051a39Sopenharmony_ci    int cnt;
380e1051a39Sopenharmony_ci    size_t rem, nlo, nhi;
381e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
382e1051a39Sopenharmony_ci
383e1051a39Sopenharmony_ci#   if 1
384e1051a39Sopenharmony_ci    do {
385e1051a39Sopenharmony_ci        cnt = 15;
386e1051a39Sopenharmony_ci        nlo = ((const u8 *)Xi)[15];
387e1051a39Sopenharmony_ci        nlo ^= inp[15];
388e1051a39Sopenharmony_ci        nhi = nlo >> 4;
389e1051a39Sopenharmony_ci        nlo &= 0xf;
390e1051a39Sopenharmony_ci
391e1051a39Sopenharmony_ci        Z.hi = Htable[nlo].hi;
392e1051a39Sopenharmony_ci        Z.lo = Htable[nlo].lo;
393e1051a39Sopenharmony_ci
394e1051a39Sopenharmony_ci        while (1) {
395e1051a39Sopenharmony_ci            rem = (size_t)Z.lo & 0xf;
396e1051a39Sopenharmony_ci            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397e1051a39Sopenharmony_ci            Z.hi = (Z.hi >> 4);
398e1051a39Sopenharmony_ci            if (sizeof(size_t) == 8)
399e1051a39Sopenharmony_ci                Z.hi ^= rem_4bit[rem];
400e1051a39Sopenharmony_ci            else
401e1051a39Sopenharmony_ci                Z.hi ^= (u64)rem_4bit[rem] << 32;
402e1051a39Sopenharmony_ci
403e1051a39Sopenharmony_ci            Z.hi ^= Htable[nhi].hi;
404e1051a39Sopenharmony_ci            Z.lo ^= Htable[nhi].lo;
405e1051a39Sopenharmony_ci
406e1051a39Sopenharmony_ci            if (--cnt < 0)
407e1051a39Sopenharmony_ci                break;
408e1051a39Sopenharmony_ci
409e1051a39Sopenharmony_ci            nlo = ((const u8 *)Xi)[cnt];
410e1051a39Sopenharmony_ci            nlo ^= inp[cnt];
411e1051a39Sopenharmony_ci            nhi = nlo >> 4;
412e1051a39Sopenharmony_ci            nlo &= 0xf;
413e1051a39Sopenharmony_ci
414e1051a39Sopenharmony_ci            rem = (size_t)Z.lo & 0xf;
415e1051a39Sopenharmony_ci            Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416e1051a39Sopenharmony_ci            Z.hi = (Z.hi >> 4);
417e1051a39Sopenharmony_ci            if (sizeof(size_t) == 8)
418e1051a39Sopenharmony_ci                Z.hi ^= rem_4bit[rem];
419e1051a39Sopenharmony_ci            else
420e1051a39Sopenharmony_ci                Z.hi ^= (u64)rem_4bit[rem] << 32;
421e1051a39Sopenharmony_ci
422e1051a39Sopenharmony_ci            Z.hi ^= Htable[nlo].hi;
423e1051a39Sopenharmony_ci            Z.lo ^= Htable[nlo].lo;
424e1051a39Sopenharmony_ci        }
425e1051a39Sopenharmony_ci#   else
426e1051a39Sopenharmony_ci    /*
427e1051a39Sopenharmony_ci     * Extra 256+16 bytes per-key plus 512 bytes shared tables
428e1051a39Sopenharmony_ci     * [should] give ~50% improvement... One could have PACK()-ed
429e1051a39Sopenharmony_ci     * the rem_8bit even here, but the priority is to minimize
430e1051a39Sopenharmony_ci     * cache footprint...
431e1051a39Sopenharmony_ci     */
432e1051a39Sopenharmony_ci    u128 Hshr4[16];             /* Htable shifted right by 4 bits */
433e1051a39Sopenharmony_ci    u8 Hshl4[16];               /* Htable shifted left by 4 bits */
434e1051a39Sopenharmony_ci    static const unsigned short rem_8bit[256] = {
435e1051a39Sopenharmony_ci        0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436e1051a39Sopenharmony_ci        0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437e1051a39Sopenharmony_ci        0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438e1051a39Sopenharmony_ci        0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439e1051a39Sopenharmony_ci        0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440e1051a39Sopenharmony_ci        0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441e1051a39Sopenharmony_ci        0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442e1051a39Sopenharmony_ci        0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443e1051a39Sopenharmony_ci        0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444e1051a39Sopenharmony_ci        0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445e1051a39Sopenharmony_ci        0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446e1051a39Sopenharmony_ci        0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447e1051a39Sopenharmony_ci        0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448e1051a39Sopenharmony_ci        0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449e1051a39Sopenharmony_ci        0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450e1051a39Sopenharmony_ci        0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451e1051a39Sopenharmony_ci        0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452e1051a39Sopenharmony_ci        0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453e1051a39Sopenharmony_ci        0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454e1051a39Sopenharmony_ci        0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455e1051a39Sopenharmony_ci        0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456e1051a39Sopenharmony_ci        0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457e1051a39Sopenharmony_ci        0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458e1051a39Sopenharmony_ci        0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459e1051a39Sopenharmony_ci        0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460e1051a39Sopenharmony_ci        0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461e1051a39Sopenharmony_ci        0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462e1051a39Sopenharmony_ci        0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463e1051a39Sopenharmony_ci        0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464e1051a39Sopenharmony_ci        0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465e1051a39Sopenharmony_ci        0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466e1051a39Sopenharmony_ci        0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467e1051a39Sopenharmony_ci    };
468e1051a39Sopenharmony_ci    /*
469e1051a39Sopenharmony_ci     * This pre-processing phase slows down procedure by approximately
470e1051a39Sopenharmony_ci     * same time as it makes each loop spin faster. In other words
471e1051a39Sopenharmony_ci     * single block performance is approximately same as straightforward
472e1051a39Sopenharmony_ci     * "4-bit" implementation, and then it goes only faster...
473e1051a39Sopenharmony_ci     */
474e1051a39Sopenharmony_ci    for (cnt = 0; cnt < 16; ++cnt) {
475e1051a39Sopenharmony_ci        Z.hi = Htable[cnt].hi;
476e1051a39Sopenharmony_ci        Z.lo = Htable[cnt].lo;
477e1051a39Sopenharmony_ci        Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478e1051a39Sopenharmony_ci        Hshr4[cnt].hi = (Z.hi >> 4);
479e1051a39Sopenharmony_ci        Hshl4[cnt] = (u8)(Z.lo << 4);
480e1051a39Sopenharmony_ci    }
481e1051a39Sopenharmony_ci
482e1051a39Sopenharmony_ci    do {
483e1051a39Sopenharmony_ci        for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484e1051a39Sopenharmony_ci            nlo = ((const u8 *)Xi)[cnt];
485e1051a39Sopenharmony_ci            nlo ^= inp[cnt];
486e1051a39Sopenharmony_ci            nhi = nlo >> 4;
487e1051a39Sopenharmony_ci            nlo &= 0xf;
488e1051a39Sopenharmony_ci
489e1051a39Sopenharmony_ci            Z.hi ^= Htable[nlo].hi;
490e1051a39Sopenharmony_ci            Z.lo ^= Htable[nlo].lo;
491e1051a39Sopenharmony_ci
492e1051a39Sopenharmony_ci            rem = (size_t)Z.lo & 0xff;
493e1051a39Sopenharmony_ci
494e1051a39Sopenharmony_ci            Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495e1051a39Sopenharmony_ci            Z.hi = (Z.hi >> 8);
496e1051a39Sopenharmony_ci
497e1051a39Sopenharmony_ci            Z.hi ^= Hshr4[nhi].hi;
498e1051a39Sopenharmony_ci            Z.lo ^= Hshr4[nhi].lo;
499e1051a39Sopenharmony_ci            Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500e1051a39Sopenharmony_ci        }
501e1051a39Sopenharmony_ci
502e1051a39Sopenharmony_ci        nlo = ((const u8 *)Xi)[0];
503e1051a39Sopenharmony_ci        nlo ^= inp[0];
504e1051a39Sopenharmony_ci        nhi = nlo >> 4;
505e1051a39Sopenharmony_ci        nlo &= 0xf;
506e1051a39Sopenharmony_ci
507e1051a39Sopenharmony_ci        Z.hi ^= Htable[nlo].hi;
508e1051a39Sopenharmony_ci        Z.lo ^= Htable[nlo].lo;
509e1051a39Sopenharmony_ci
510e1051a39Sopenharmony_ci        rem = (size_t)Z.lo & 0xf;
511e1051a39Sopenharmony_ci
512e1051a39Sopenharmony_ci        Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513e1051a39Sopenharmony_ci        Z.hi = (Z.hi >> 4);
514e1051a39Sopenharmony_ci
515e1051a39Sopenharmony_ci        Z.hi ^= Htable[nhi].hi;
516e1051a39Sopenharmony_ci        Z.lo ^= Htable[nhi].lo;
517e1051a39Sopenharmony_ci        Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518e1051a39Sopenharmony_ci#   endif
519e1051a39Sopenharmony_ci
520e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN) {
521e1051a39Sopenharmony_ci#   ifdef BSWAP8
522e1051a39Sopenharmony_ci            Xi[0] = BSWAP8(Z.hi);
523e1051a39Sopenharmony_ci            Xi[1] = BSWAP8(Z.lo);
524e1051a39Sopenharmony_ci#   else
525e1051a39Sopenharmony_ci            u8 *p = (u8 *)Xi;
526e1051a39Sopenharmony_ci            u32 v;
527e1051a39Sopenharmony_ci            v = (u32)(Z.hi >> 32);
528e1051a39Sopenharmony_ci            PUTU32(p, v);
529e1051a39Sopenharmony_ci            v = (u32)(Z.hi);
530e1051a39Sopenharmony_ci            PUTU32(p + 4, v);
531e1051a39Sopenharmony_ci            v = (u32)(Z.lo >> 32);
532e1051a39Sopenharmony_ci            PUTU32(p + 8, v);
533e1051a39Sopenharmony_ci            v = (u32)(Z.lo);
534e1051a39Sopenharmony_ci            PUTU32(p + 12, v);
535e1051a39Sopenharmony_ci#   endif
536e1051a39Sopenharmony_ci        } else {
537e1051a39Sopenharmony_ci            Xi[0] = Z.hi;
538e1051a39Sopenharmony_ci            Xi[1] = Z.lo;
539e1051a39Sopenharmony_ci        }
540e1051a39Sopenharmony_ci    } while (inp += 16, len -= 16);
541e1051a39Sopenharmony_ci}
542e1051a39Sopenharmony_ci#  endif
543e1051a39Sopenharmony_ci# else
544e1051a39Sopenharmony_civoid gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545e1051a39Sopenharmony_civoid gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546e1051a39Sopenharmony_ci                    size_t len);
547e1051a39Sopenharmony_ci# endif
548e1051a39Sopenharmony_ci
549e1051a39Sopenharmony_ci# define GCM_MUL(ctx)      gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550e1051a39Sopenharmony_ci# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551e1051a39Sopenharmony_ci#  define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552e1051a39Sopenharmony_ci/*
553e1051a39Sopenharmony_ci * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554e1051a39Sopenharmony_ci * effect. In other words idea is to hash data while it's still in L1 cache
555e1051a39Sopenharmony_ci * after encryption pass...
556e1051a39Sopenharmony_ci */
557e1051a39Sopenharmony_ci#  define GHASH_CHUNK       (3*1024)
558e1051a39Sopenharmony_ci# endif
559e1051a39Sopenharmony_ci
560e1051a39Sopenharmony_ci#else                           /* TABLE_BITS */
561e1051a39Sopenharmony_ci
562e1051a39Sopenharmony_cistatic void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
563e1051a39Sopenharmony_ci{
564e1051a39Sopenharmony_ci    u128 V, Z = { 0, 0 };
565e1051a39Sopenharmony_ci    long X;
566e1051a39Sopenharmony_ci    int i, j;
567e1051a39Sopenharmony_ci    const long *xi = (const long *)Xi;
568e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
569e1051a39Sopenharmony_ci
570e1051a39Sopenharmony_ci    V.hi = H[0];                /* H is in host byte order, no byte swapping */
571e1051a39Sopenharmony_ci    V.lo = H[1];
572e1051a39Sopenharmony_ci
573e1051a39Sopenharmony_ci    for (j = 0; j < 16 / sizeof(long); ++j) {
574e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN) {
575e1051a39Sopenharmony_ci            if (sizeof(long) == 8) {
576e1051a39Sopenharmony_ci# ifdef BSWAP8
577e1051a39Sopenharmony_ci                X = (long)(BSWAP8(xi[j]));
578e1051a39Sopenharmony_ci# else
579e1051a39Sopenharmony_ci                const u8 *p = (const u8 *)(xi + j);
580e1051a39Sopenharmony_ci                X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581e1051a39Sopenharmony_ci# endif
582e1051a39Sopenharmony_ci            } else {
583e1051a39Sopenharmony_ci                const u8 *p = (const u8 *)(xi + j);
584e1051a39Sopenharmony_ci                X = (long)GETU32(p);
585e1051a39Sopenharmony_ci            }
586e1051a39Sopenharmony_ci        } else
587e1051a39Sopenharmony_ci            X = xi[j];
588e1051a39Sopenharmony_ci
589e1051a39Sopenharmony_ci        for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590e1051a39Sopenharmony_ci            u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591e1051a39Sopenharmony_ci            Z.hi ^= V.hi & M;
592e1051a39Sopenharmony_ci            Z.lo ^= V.lo & M;
593e1051a39Sopenharmony_ci
594e1051a39Sopenharmony_ci            REDUCE1BIT(V);
595e1051a39Sopenharmony_ci        }
596e1051a39Sopenharmony_ci    }
597e1051a39Sopenharmony_ci
598e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN) {
599e1051a39Sopenharmony_ci# ifdef BSWAP8
600e1051a39Sopenharmony_ci        Xi[0] = BSWAP8(Z.hi);
601e1051a39Sopenharmony_ci        Xi[1] = BSWAP8(Z.lo);
602e1051a39Sopenharmony_ci# else
603e1051a39Sopenharmony_ci        u8 *p = (u8 *)Xi;
604e1051a39Sopenharmony_ci        u32 v;
605e1051a39Sopenharmony_ci        v = (u32)(Z.hi >> 32);
606e1051a39Sopenharmony_ci        PUTU32(p, v);
607e1051a39Sopenharmony_ci        v = (u32)(Z.hi);
608e1051a39Sopenharmony_ci        PUTU32(p + 4, v);
609e1051a39Sopenharmony_ci        v = (u32)(Z.lo >> 32);
610e1051a39Sopenharmony_ci        PUTU32(p + 8, v);
611e1051a39Sopenharmony_ci        v = (u32)(Z.lo);
612e1051a39Sopenharmony_ci        PUTU32(p + 12, v);
613e1051a39Sopenharmony_ci# endif
614e1051a39Sopenharmony_ci    } else {
615e1051a39Sopenharmony_ci        Xi[0] = Z.hi;
616e1051a39Sopenharmony_ci        Xi[1] = Z.lo;
617e1051a39Sopenharmony_ci    }
618e1051a39Sopenharmony_ci}
619e1051a39Sopenharmony_ci
620e1051a39Sopenharmony_ci# define GCM_MUL(ctx)      gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
621e1051a39Sopenharmony_ci
622e1051a39Sopenharmony_ci#endif
623e1051a39Sopenharmony_ci
624e1051a39Sopenharmony_ci#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625e1051a39Sopenharmony_ci# if    !defined(I386_ONLY) && \
626e1051a39Sopenharmony_ci        (defined(__i386)        || defined(__i386__)    || \
627e1051a39Sopenharmony_ci         defined(__x86_64)      || defined(__x86_64__)  || \
628e1051a39Sopenharmony_ci         defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
629e1051a39Sopenharmony_ci#  define GHASH_ASM_X86_OR_64
630e1051a39Sopenharmony_ci#  define GCM_FUNCREF_4BIT
631e1051a39Sopenharmony_ci
632e1051a39Sopenharmony_civoid gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633e1051a39Sopenharmony_civoid gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634e1051a39Sopenharmony_civoid gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635e1051a39Sopenharmony_ci                     size_t len);
636e1051a39Sopenharmony_ci
637e1051a39Sopenharmony_ci#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638e1051a39Sopenharmony_ci#   define gcm_init_avx   gcm_init_clmul
639e1051a39Sopenharmony_ci#   define gcm_gmult_avx  gcm_gmult_clmul
640e1051a39Sopenharmony_ci#   define gcm_ghash_avx  gcm_ghash_clmul
641e1051a39Sopenharmony_ci#  else
642e1051a39Sopenharmony_civoid gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643e1051a39Sopenharmony_civoid gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644e1051a39Sopenharmony_civoid gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645e1051a39Sopenharmony_ci                   size_t len);
646e1051a39Sopenharmony_ci#  endif
647e1051a39Sopenharmony_ci
648e1051a39Sopenharmony_ci#  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
649e1051a39Sopenharmony_ci#   define GHASH_ASM_X86
650e1051a39Sopenharmony_civoid gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651e1051a39Sopenharmony_civoid gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652e1051a39Sopenharmony_ci                        size_t len);
653e1051a39Sopenharmony_ci
654e1051a39Sopenharmony_civoid gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655e1051a39Sopenharmony_civoid gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656e1051a39Sopenharmony_ci                        size_t len);
657e1051a39Sopenharmony_ci#  endif
658e1051a39Sopenharmony_ci# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659e1051a39Sopenharmony_ci#  include "arm_arch.h"
660e1051a39Sopenharmony_ci#  if __ARM_MAX_ARCH__>=7
661e1051a39Sopenharmony_ci#   define GHASH_ASM_ARM
662e1051a39Sopenharmony_ci#   define GCM_FUNCREF_4BIT
663e1051a39Sopenharmony_ci#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
664e1051a39Sopenharmony_ci#   if defined(__arm__) || defined(__arm)
665e1051a39Sopenharmony_ci#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
666e1051a39Sopenharmony_ci#   endif
667e1051a39Sopenharmony_civoid gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668e1051a39Sopenharmony_civoid gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669e1051a39Sopenharmony_civoid gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670e1051a39Sopenharmony_ci                    size_t len);
671e1051a39Sopenharmony_civoid gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672e1051a39Sopenharmony_civoid gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673e1051a39Sopenharmony_civoid gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674e1051a39Sopenharmony_ci                  size_t len);
675e1051a39Sopenharmony_ci#  endif
676e1051a39Sopenharmony_ci# elif defined(__sparc__) || defined(__sparc)
677e1051a39Sopenharmony_ci#  include "crypto/sparc_arch.h"
678e1051a39Sopenharmony_ci#  define GHASH_ASM_SPARC
679e1051a39Sopenharmony_ci#  define GCM_FUNCREF_4BIT
680e1051a39Sopenharmony_civoid gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
681e1051a39Sopenharmony_civoid gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
682e1051a39Sopenharmony_civoid gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
683e1051a39Sopenharmony_ci                    size_t len);
684e1051a39Sopenharmony_ci# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
685e1051a39Sopenharmony_ci#  include "crypto/ppc_arch.h"
686e1051a39Sopenharmony_ci#  define GHASH_ASM_PPC
687e1051a39Sopenharmony_ci#  define GCM_FUNCREF_4BIT
688e1051a39Sopenharmony_civoid gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
689e1051a39Sopenharmony_civoid gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
690e1051a39Sopenharmony_civoid gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
691e1051a39Sopenharmony_ci                  size_t len);
692e1051a39Sopenharmony_ci# endif
693e1051a39Sopenharmony_ci#endif
694e1051a39Sopenharmony_ci
695e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT
696e1051a39Sopenharmony_ci# undef  GCM_MUL
697e1051a39Sopenharmony_ci# define GCM_MUL(ctx)           (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
698e1051a39Sopenharmony_ci# ifdef GHASH
699e1051a39Sopenharmony_ci#  undef  GHASH
700e1051a39Sopenharmony_ci#  define GHASH(ctx,in,len)     (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
701e1051a39Sopenharmony_ci# endif
702e1051a39Sopenharmony_ci#endif
703e1051a39Sopenharmony_ci
704e1051a39Sopenharmony_civoid CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
705e1051a39Sopenharmony_ci{
706e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
707e1051a39Sopenharmony_ci
708e1051a39Sopenharmony_ci    memset(ctx, 0, sizeof(*ctx));
709e1051a39Sopenharmony_ci    ctx->block = block;
710e1051a39Sopenharmony_ci    ctx->key = key;
711e1051a39Sopenharmony_ci
712e1051a39Sopenharmony_ci    (*block) (ctx->H.c, ctx->H.c, key);
713e1051a39Sopenharmony_ci
714e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN) {
715e1051a39Sopenharmony_ci        /* H is stored in host byte order */
716e1051a39Sopenharmony_ci#ifdef BSWAP8
717e1051a39Sopenharmony_ci        ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
718e1051a39Sopenharmony_ci        ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
719e1051a39Sopenharmony_ci#else
720e1051a39Sopenharmony_ci        u8 *p = ctx->H.c;
721e1051a39Sopenharmony_ci        u64 hi, lo;
722e1051a39Sopenharmony_ci        hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
723e1051a39Sopenharmony_ci        lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
724e1051a39Sopenharmony_ci        ctx->H.u[0] = hi;
725e1051a39Sopenharmony_ci        ctx->H.u[1] = lo;
726e1051a39Sopenharmony_ci#endif
727e1051a39Sopenharmony_ci    }
728e1051a39Sopenharmony_ci#if     TABLE_BITS==8
729e1051a39Sopenharmony_ci    gcm_init_8bit(ctx->Htable, ctx->H.u);
730e1051a39Sopenharmony_ci#elif   TABLE_BITS==4
731e1051a39Sopenharmony_ci# if    defined(GHASH)
732e1051a39Sopenharmony_ci#  define CTX__GHASH(f) (ctx->ghash = (f))
733e1051a39Sopenharmony_ci# else
734e1051a39Sopenharmony_ci#  define CTX__GHASH(f) (ctx->ghash = NULL)
735e1051a39Sopenharmony_ci# endif
736e1051a39Sopenharmony_ci# if    defined(GHASH_ASM_X86_OR_64)
737e1051a39Sopenharmony_ci#  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
738e1051a39Sopenharmony_ci    if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
739e1051a39Sopenharmony_ci        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
740e1051a39Sopenharmony_ci            gcm_init_avx(ctx->Htable, ctx->H.u);
741e1051a39Sopenharmony_ci            ctx->gmult = gcm_gmult_avx;
742e1051a39Sopenharmony_ci            CTX__GHASH(gcm_ghash_avx);
743e1051a39Sopenharmony_ci        } else {
744e1051a39Sopenharmony_ci            gcm_init_clmul(ctx->Htable, ctx->H.u);
745e1051a39Sopenharmony_ci            ctx->gmult = gcm_gmult_clmul;
746e1051a39Sopenharmony_ci            CTX__GHASH(gcm_ghash_clmul);
747e1051a39Sopenharmony_ci        }
748e1051a39Sopenharmony_ci        return;
749e1051a39Sopenharmony_ci    }
750e1051a39Sopenharmony_ci#  endif
751e1051a39Sopenharmony_ci    gcm_init_4bit(ctx->Htable, ctx->H.u);
752e1051a39Sopenharmony_ci#  if   defined(GHASH_ASM_X86)  /* x86 only */
753e1051a39Sopenharmony_ci#   if  defined(OPENSSL_IA32_SSE2)
754e1051a39Sopenharmony_ci    if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
755e1051a39Sopenharmony_ci#   else
756e1051a39Sopenharmony_ci    if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
757e1051a39Sopenharmony_ci#   endif
758e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_4bit_mmx;
759e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_4bit_mmx);
760e1051a39Sopenharmony_ci    } else {
761e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_4bit_x86;
762e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_4bit_x86);
763e1051a39Sopenharmony_ci    }
764e1051a39Sopenharmony_ci#  else
765e1051a39Sopenharmony_ci    ctx->gmult = gcm_gmult_4bit;
766e1051a39Sopenharmony_ci    CTX__GHASH(gcm_ghash_4bit);
767e1051a39Sopenharmony_ci#  endif
768e1051a39Sopenharmony_ci# elif  defined(GHASH_ASM_ARM)
769e1051a39Sopenharmony_ci#  ifdef PMULL_CAPABLE
770e1051a39Sopenharmony_ci    if (PMULL_CAPABLE) {
771e1051a39Sopenharmony_ci        gcm_init_v8(ctx->Htable, ctx->H.u);
772e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_v8;
773e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_v8);
774e1051a39Sopenharmony_ci    } else
775e1051a39Sopenharmony_ci#  endif
776e1051a39Sopenharmony_ci#  ifdef NEON_CAPABLE
777e1051a39Sopenharmony_ci    if (NEON_CAPABLE) {
778e1051a39Sopenharmony_ci        gcm_init_neon(ctx->Htable, ctx->H.u);
779e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_neon;
780e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_neon);
781e1051a39Sopenharmony_ci    } else
782e1051a39Sopenharmony_ci#  endif
783e1051a39Sopenharmony_ci    {
784e1051a39Sopenharmony_ci        gcm_init_4bit(ctx->Htable, ctx->H.u);
785e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_4bit;
786e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_4bit);
787e1051a39Sopenharmony_ci    }
788e1051a39Sopenharmony_ci# elif  defined(GHASH_ASM_SPARC)
789e1051a39Sopenharmony_ci    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
790e1051a39Sopenharmony_ci        gcm_init_vis3(ctx->Htable, ctx->H.u);
791e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_vis3;
792e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_vis3);
793e1051a39Sopenharmony_ci    } else {
794e1051a39Sopenharmony_ci        gcm_init_4bit(ctx->Htable, ctx->H.u);
795e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_4bit;
796e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_4bit);
797e1051a39Sopenharmony_ci    }
798e1051a39Sopenharmony_ci# elif  defined(GHASH_ASM_PPC)
799e1051a39Sopenharmony_ci    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
800e1051a39Sopenharmony_ci        gcm_init_p8(ctx->Htable, ctx->H.u);
801e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_p8;
802e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_p8);
803e1051a39Sopenharmony_ci    } else {
804e1051a39Sopenharmony_ci        gcm_init_4bit(ctx->Htable, ctx->H.u);
805e1051a39Sopenharmony_ci        ctx->gmult = gcm_gmult_4bit;
806e1051a39Sopenharmony_ci        CTX__GHASH(gcm_ghash_4bit);
807e1051a39Sopenharmony_ci    }
808e1051a39Sopenharmony_ci# else
809e1051a39Sopenharmony_ci    gcm_init_4bit(ctx->Htable, ctx->H.u);
810e1051a39Sopenharmony_ci# endif
811e1051a39Sopenharmony_ci# undef CTX__GHASH
812e1051a39Sopenharmony_ci#endif
813e1051a39Sopenharmony_ci}
814e1051a39Sopenharmony_ci
815e1051a39Sopenharmony_civoid CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
816e1051a39Sopenharmony_ci                         size_t len)
817e1051a39Sopenharmony_ci{
818e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
819e1051a39Sopenharmony_ci    unsigned int ctr;
820e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT
821e1051a39Sopenharmony_ci    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
822e1051a39Sopenharmony_ci#endif
823e1051a39Sopenharmony_ci
824e1051a39Sopenharmony_ci    ctx->len.u[0] = 0;          /* AAD length */
825e1051a39Sopenharmony_ci    ctx->len.u[1] = 0;          /* message length */
826e1051a39Sopenharmony_ci    ctx->ares = 0;
827e1051a39Sopenharmony_ci    ctx->mres = 0;
828e1051a39Sopenharmony_ci
829e1051a39Sopenharmony_ci    if (len == 12) {
830e1051a39Sopenharmony_ci        memcpy(ctx->Yi.c, iv, 12);
831e1051a39Sopenharmony_ci        ctx->Yi.c[12] = 0;
832e1051a39Sopenharmony_ci        ctx->Yi.c[13] = 0;
833e1051a39Sopenharmony_ci        ctx->Yi.c[14] = 0;
834e1051a39Sopenharmony_ci        ctx->Yi.c[15] = 1;
835e1051a39Sopenharmony_ci        ctr = 1;
836e1051a39Sopenharmony_ci    } else {
837e1051a39Sopenharmony_ci        size_t i;
838e1051a39Sopenharmony_ci        u64 len0 = len;
839e1051a39Sopenharmony_ci
840e1051a39Sopenharmony_ci        /* Borrow ctx->Xi to calculate initial Yi */
841e1051a39Sopenharmony_ci        ctx->Xi.u[0] = 0;
842e1051a39Sopenharmony_ci        ctx->Xi.u[1] = 0;
843e1051a39Sopenharmony_ci
844e1051a39Sopenharmony_ci        while (len >= 16) {
845e1051a39Sopenharmony_ci            for (i = 0; i < 16; ++i)
846e1051a39Sopenharmony_ci                ctx->Xi.c[i] ^= iv[i];
847e1051a39Sopenharmony_ci            GCM_MUL(ctx);
848e1051a39Sopenharmony_ci            iv += 16;
849e1051a39Sopenharmony_ci            len -= 16;
850e1051a39Sopenharmony_ci        }
851e1051a39Sopenharmony_ci        if (len) {
852e1051a39Sopenharmony_ci            for (i = 0; i < len; ++i)
853e1051a39Sopenharmony_ci                ctx->Xi.c[i] ^= iv[i];
854e1051a39Sopenharmony_ci            GCM_MUL(ctx);
855e1051a39Sopenharmony_ci        }
856e1051a39Sopenharmony_ci        len0 <<= 3;
857e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN) {
858e1051a39Sopenharmony_ci#ifdef BSWAP8
859e1051a39Sopenharmony_ci            ctx->Xi.u[1] ^= BSWAP8(len0);
860e1051a39Sopenharmony_ci#else
861e1051a39Sopenharmony_ci            ctx->Xi.c[8] ^= (u8)(len0 >> 56);
862e1051a39Sopenharmony_ci            ctx->Xi.c[9] ^= (u8)(len0 >> 48);
863e1051a39Sopenharmony_ci            ctx->Xi.c[10] ^= (u8)(len0 >> 40);
864e1051a39Sopenharmony_ci            ctx->Xi.c[11] ^= (u8)(len0 >> 32);
865e1051a39Sopenharmony_ci            ctx->Xi.c[12] ^= (u8)(len0 >> 24);
866e1051a39Sopenharmony_ci            ctx->Xi.c[13] ^= (u8)(len0 >> 16);
867e1051a39Sopenharmony_ci            ctx->Xi.c[14] ^= (u8)(len0 >> 8);
868e1051a39Sopenharmony_ci            ctx->Xi.c[15] ^= (u8)(len0);
869e1051a39Sopenharmony_ci#endif
870e1051a39Sopenharmony_ci        } else {
871e1051a39Sopenharmony_ci            ctx->Xi.u[1] ^= len0;
872e1051a39Sopenharmony_ci        }
873e1051a39Sopenharmony_ci
874e1051a39Sopenharmony_ci        GCM_MUL(ctx);
875e1051a39Sopenharmony_ci
876e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
877e1051a39Sopenharmony_ci#ifdef BSWAP4
878e1051a39Sopenharmony_ci            ctr = BSWAP4(ctx->Xi.d[3]);
879e1051a39Sopenharmony_ci#else
880e1051a39Sopenharmony_ci            ctr = GETU32(ctx->Xi.c + 12);
881e1051a39Sopenharmony_ci#endif
882e1051a39Sopenharmony_ci        else
883e1051a39Sopenharmony_ci            ctr = ctx->Xi.d[3];
884e1051a39Sopenharmony_ci
885e1051a39Sopenharmony_ci        /* Copy borrowed Xi to Yi */
886e1051a39Sopenharmony_ci        ctx->Yi.u[0] = ctx->Xi.u[0];
887e1051a39Sopenharmony_ci        ctx->Yi.u[1] = ctx->Xi.u[1];
888e1051a39Sopenharmony_ci    }
889e1051a39Sopenharmony_ci
890e1051a39Sopenharmony_ci    ctx->Xi.u[0] = 0;
891e1051a39Sopenharmony_ci    ctx->Xi.u[1] = 0;
892e1051a39Sopenharmony_ci
893e1051a39Sopenharmony_ci    (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
894e1051a39Sopenharmony_ci    ++ctr;
895e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN)
896e1051a39Sopenharmony_ci#ifdef BSWAP4
897e1051a39Sopenharmony_ci        ctx->Yi.d[3] = BSWAP4(ctr);
898e1051a39Sopenharmony_ci#else
899e1051a39Sopenharmony_ci        PUTU32(ctx->Yi.c + 12, ctr);
900e1051a39Sopenharmony_ci#endif
901e1051a39Sopenharmony_ci    else
902e1051a39Sopenharmony_ci        ctx->Yi.d[3] = ctr;
903e1051a39Sopenharmony_ci}
904e1051a39Sopenharmony_ci
905e1051a39Sopenharmony_ciint CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
906e1051a39Sopenharmony_ci                      size_t len)
907e1051a39Sopenharmony_ci{
908e1051a39Sopenharmony_ci    size_t i;
909e1051a39Sopenharmony_ci    unsigned int n;
910e1051a39Sopenharmony_ci    u64 alen = ctx->len.u[0];
911e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT
912e1051a39Sopenharmony_ci    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
913e1051a39Sopenharmony_ci# ifdef GHASH
914e1051a39Sopenharmony_ci    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
915e1051a39Sopenharmony_ci                         const u8 *inp, size_t len) = ctx->ghash;
916e1051a39Sopenharmony_ci# endif
917e1051a39Sopenharmony_ci#endif
918e1051a39Sopenharmony_ci
919e1051a39Sopenharmony_ci    if (ctx->len.u[1])
920e1051a39Sopenharmony_ci        return -2;
921e1051a39Sopenharmony_ci
922e1051a39Sopenharmony_ci    alen += len;
923e1051a39Sopenharmony_ci    if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
924e1051a39Sopenharmony_ci        return -1;
925e1051a39Sopenharmony_ci    ctx->len.u[0] = alen;
926e1051a39Sopenharmony_ci
927e1051a39Sopenharmony_ci    n = ctx->ares;
928e1051a39Sopenharmony_ci    if (n) {
929e1051a39Sopenharmony_ci        while (n && len) {
930e1051a39Sopenharmony_ci            ctx->Xi.c[n] ^= *(aad++);
931e1051a39Sopenharmony_ci            --len;
932e1051a39Sopenharmony_ci            n = (n + 1) % 16;
933e1051a39Sopenharmony_ci        }
934e1051a39Sopenharmony_ci        if (n == 0)
935e1051a39Sopenharmony_ci            GCM_MUL(ctx);
936e1051a39Sopenharmony_ci        else {
937e1051a39Sopenharmony_ci            ctx->ares = n;
938e1051a39Sopenharmony_ci            return 0;
939e1051a39Sopenharmony_ci        }
940e1051a39Sopenharmony_ci    }
941e1051a39Sopenharmony_ci#ifdef GHASH
942e1051a39Sopenharmony_ci    if ((i = (len & (size_t)-16))) {
943e1051a39Sopenharmony_ci        GHASH(ctx, aad, i);
944e1051a39Sopenharmony_ci        aad += i;
945e1051a39Sopenharmony_ci        len -= i;
946e1051a39Sopenharmony_ci    }
947e1051a39Sopenharmony_ci#else
948e1051a39Sopenharmony_ci    while (len >= 16) {
949e1051a39Sopenharmony_ci        for (i = 0; i < 16; ++i)
950e1051a39Sopenharmony_ci            ctx->Xi.c[i] ^= aad[i];
951e1051a39Sopenharmony_ci        GCM_MUL(ctx);
952e1051a39Sopenharmony_ci        aad += 16;
953e1051a39Sopenharmony_ci        len -= 16;
954e1051a39Sopenharmony_ci    }
955e1051a39Sopenharmony_ci#endif
956e1051a39Sopenharmony_ci    if (len) {
957e1051a39Sopenharmony_ci        n = (unsigned int)len;
958e1051a39Sopenharmony_ci        for (i = 0; i < len; ++i)
959e1051a39Sopenharmony_ci            ctx->Xi.c[i] ^= aad[i];
960e1051a39Sopenharmony_ci    }
961e1051a39Sopenharmony_ci
962e1051a39Sopenharmony_ci    ctx->ares = n;
963e1051a39Sopenharmony_ci    return 0;
964e1051a39Sopenharmony_ci}
965e1051a39Sopenharmony_ci
966e1051a39Sopenharmony_ciint CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
967e1051a39Sopenharmony_ci                          const unsigned char *in, unsigned char *out,
968e1051a39Sopenharmony_ci                          size_t len)
969e1051a39Sopenharmony_ci{
970e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
971e1051a39Sopenharmony_ci    unsigned int n, ctr, mres;
972e1051a39Sopenharmony_ci    size_t i;
973e1051a39Sopenharmony_ci    u64 mlen = ctx->len.u[1];
974e1051a39Sopenharmony_ci    block128_f block = ctx->block;
975e1051a39Sopenharmony_ci    void *key = ctx->key;
976e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT
977e1051a39Sopenharmony_ci    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
978e1051a39Sopenharmony_ci# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
979e1051a39Sopenharmony_ci    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
980e1051a39Sopenharmony_ci                         const u8 *inp, size_t len) = ctx->ghash;
981e1051a39Sopenharmony_ci# endif
982e1051a39Sopenharmony_ci#endif
983e1051a39Sopenharmony_ci
984e1051a39Sopenharmony_ci    mlen += len;
985e1051a39Sopenharmony_ci    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
986e1051a39Sopenharmony_ci        return -1;
987e1051a39Sopenharmony_ci    ctx->len.u[1] = mlen;
988e1051a39Sopenharmony_ci
989e1051a39Sopenharmony_ci    mres = ctx->mres;
990e1051a39Sopenharmony_ci
991e1051a39Sopenharmony_ci    if (ctx->ares) {
992e1051a39Sopenharmony_ci        /* First call to encrypt finalizes GHASH(AAD) */
993e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
994e1051a39Sopenharmony_ci        if (len == 0) {
995e1051a39Sopenharmony_ci            GCM_MUL(ctx);
996e1051a39Sopenharmony_ci            ctx->ares = 0;
997e1051a39Sopenharmony_ci            return 0;
998e1051a39Sopenharmony_ci        }
999e1051a39Sopenharmony_ci        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1000e1051a39Sopenharmony_ci        ctx->Xi.u[0] = 0;
1001e1051a39Sopenharmony_ci        ctx->Xi.u[1] = 0;
1002e1051a39Sopenharmony_ci        mres = sizeof(ctx->Xi);
1003e1051a39Sopenharmony_ci#else
1004e1051a39Sopenharmony_ci        GCM_MUL(ctx);
1005e1051a39Sopenharmony_ci#endif
1006e1051a39Sopenharmony_ci        ctx->ares = 0;
1007e1051a39Sopenharmony_ci    }
1008e1051a39Sopenharmony_ci
1009e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN)
1010e1051a39Sopenharmony_ci#ifdef BSWAP4
1011e1051a39Sopenharmony_ci        ctr = BSWAP4(ctx->Yi.d[3]);
1012e1051a39Sopenharmony_ci#else
1013e1051a39Sopenharmony_ci        ctr = GETU32(ctx->Yi.c + 12);
1014e1051a39Sopenharmony_ci#endif
1015e1051a39Sopenharmony_ci    else
1016e1051a39Sopenharmony_ci        ctr = ctx->Yi.d[3];
1017e1051a39Sopenharmony_ci
1018e1051a39Sopenharmony_ci    n = mres % 16;
1019e1051a39Sopenharmony_ci#if !defined(OPENSSL_SMALL_FOOTPRINT)
1020e1051a39Sopenharmony_ci    if (16 % sizeof(size_t) == 0) { /* always true actually */
1021e1051a39Sopenharmony_ci        do {
1022e1051a39Sopenharmony_ci            if (n) {
1023e1051a39Sopenharmony_ci# if defined(GHASH)
1024e1051a39Sopenharmony_ci                while (n && len) {
1025e1051a39Sopenharmony_ci                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1026e1051a39Sopenharmony_ci                    --len;
1027e1051a39Sopenharmony_ci                    n = (n + 1) % 16;
1028e1051a39Sopenharmony_ci                }
1029e1051a39Sopenharmony_ci                if (n == 0) {
1030e1051a39Sopenharmony_ci                    GHASH(ctx, ctx->Xn, mres);
1031e1051a39Sopenharmony_ci                    mres = 0;
1032e1051a39Sopenharmony_ci                } else {
1033e1051a39Sopenharmony_ci                    ctx->mres = mres;
1034e1051a39Sopenharmony_ci                    return 0;
1035e1051a39Sopenharmony_ci                }
1036e1051a39Sopenharmony_ci# else
1037e1051a39Sopenharmony_ci                while (n && len) {
1038e1051a39Sopenharmony_ci                    ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1039e1051a39Sopenharmony_ci                    --len;
1040e1051a39Sopenharmony_ci                    n = (n + 1) % 16;
1041e1051a39Sopenharmony_ci                }
1042e1051a39Sopenharmony_ci                if (n == 0) {
1043e1051a39Sopenharmony_ci                    GCM_MUL(ctx);
1044e1051a39Sopenharmony_ci                    mres = 0;
1045e1051a39Sopenharmony_ci                } else {
1046e1051a39Sopenharmony_ci                    ctx->mres = n;
1047e1051a39Sopenharmony_ci                    return 0;
1048e1051a39Sopenharmony_ci                }
1049e1051a39Sopenharmony_ci# endif
1050e1051a39Sopenharmony_ci            }
1051e1051a39Sopenharmony_ci# if defined(STRICT_ALIGNMENT)
1052e1051a39Sopenharmony_ci            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1053e1051a39Sopenharmony_ci                break;
1054e1051a39Sopenharmony_ci# endif
1055e1051a39Sopenharmony_ci# if defined(GHASH)
1056e1051a39Sopenharmony_ci            if (len >= 16 && mres) {
1057e1051a39Sopenharmony_ci                GHASH(ctx, ctx->Xn, mres);
1058e1051a39Sopenharmony_ci                mres = 0;
1059e1051a39Sopenharmony_ci            }
1060e1051a39Sopenharmony_ci#  if defined(GHASH_CHUNK)
1061e1051a39Sopenharmony_ci            while (len >= GHASH_CHUNK) {
1062e1051a39Sopenharmony_ci                size_t j = GHASH_CHUNK;
1063e1051a39Sopenharmony_ci
1064e1051a39Sopenharmony_ci                while (j) {
1065e1051a39Sopenharmony_ci                    size_t_aX *out_t = (size_t_aX *)out;
1066e1051a39Sopenharmony_ci                    const size_t_aX *in_t = (const size_t_aX *)in;
1067e1051a39Sopenharmony_ci
1068e1051a39Sopenharmony_ci                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1069e1051a39Sopenharmony_ci                    ++ctr;
1070e1051a39Sopenharmony_ci                    if (IS_LITTLE_ENDIAN)
1071e1051a39Sopenharmony_ci#   ifdef BSWAP4
1072e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = BSWAP4(ctr);
1073e1051a39Sopenharmony_ci#   else
1074e1051a39Sopenharmony_ci                        PUTU32(ctx->Yi.c + 12, ctr);
1075e1051a39Sopenharmony_ci#   endif
1076e1051a39Sopenharmony_ci                    else
1077e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = ctr;
1078e1051a39Sopenharmony_ci                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1079e1051a39Sopenharmony_ci                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1080e1051a39Sopenharmony_ci                    out += 16;
1081e1051a39Sopenharmony_ci                    in += 16;
1082e1051a39Sopenharmony_ci                    j -= 16;
1083e1051a39Sopenharmony_ci                }
1084e1051a39Sopenharmony_ci                GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1085e1051a39Sopenharmony_ci                len -= GHASH_CHUNK;
1086e1051a39Sopenharmony_ci            }
1087e1051a39Sopenharmony_ci#  endif
1088e1051a39Sopenharmony_ci            if ((i = (len & (size_t)-16))) {
1089e1051a39Sopenharmony_ci                size_t j = i;
1090e1051a39Sopenharmony_ci
1091e1051a39Sopenharmony_ci                while (len >= 16) {
1092e1051a39Sopenharmony_ci                    size_t_aX *out_t = (size_t_aX *)out;
1093e1051a39Sopenharmony_ci                    const size_t_aX *in_t = (const size_t_aX *)in;
1094e1051a39Sopenharmony_ci
1095e1051a39Sopenharmony_ci                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1096e1051a39Sopenharmony_ci                    ++ctr;
1097e1051a39Sopenharmony_ci                    if (IS_LITTLE_ENDIAN)
1098e1051a39Sopenharmony_ci#  ifdef BSWAP4
1099e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = BSWAP4(ctr);
1100e1051a39Sopenharmony_ci#  else
1101e1051a39Sopenharmony_ci                        PUTU32(ctx->Yi.c + 12, ctr);
1102e1051a39Sopenharmony_ci#  endif
1103e1051a39Sopenharmony_ci                    else
1104e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = ctr;
1105e1051a39Sopenharmony_ci                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1106e1051a39Sopenharmony_ci                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1107e1051a39Sopenharmony_ci                    out += 16;
1108e1051a39Sopenharmony_ci                    in += 16;
1109e1051a39Sopenharmony_ci                    len -= 16;
1110e1051a39Sopenharmony_ci                }
1111e1051a39Sopenharmony_ci                GHASH(ctx, out - j, j);
1112e1051a39Sopenharmony_ci            }
1113e1051a39Sopenharmony_ci# else
1114e1051a39Sopenharmony_ci            while (len >= 16) {
1115e1051a39Sopenharmony_ci                size_t *out_t = (size_t *)out;
1116e1051a39Sopenharmony_ci                const size_t *in_t = (const size_t *)in;
1117e1051a39Sopenharmony_ci
1118e1051a39Sopenharmony_ci                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1119e1051a39Sopenharmony_ci                ++ctr;
1120e1051a39Sopenharmony_ci                if (IS_LITTLE_ENDIAN)
1121e1051a39Sopenharmony_ci#  ifdef BSWAP4
1122e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = BSWAP4(ctr);
1123e1051a39Sopenharmony_ci#  else
1124e1051a39Sopenharmony_ci                    PUTU32(ctx->Yi.c + 12, ctr);
1125e1051a39Sopenharmony_ci#  endif
1126e1051a39Sopenharmony_ci                else
1127e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = ctr;
1128e1051a39Sopenharmony_ci                for (i = 0; i < 16 / sizeof(size_t); ++i)
1129e1051a39Sopenharmony_ci                    ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1130e1051a39Sopenharmony_ci                GCM_MUL(ctx);
1131e1051a39Sopenharmony_ci                out += 16;
1132e1051a39Sopenharmony_ci                in += 16;
1133e1051a39Sopenharmony_ci                len -= 16;
1134e1051a39Sopenharmony_ci            }
1135e1051a39Sopenharmony_ci# endif
1136e1051a39Sopenharmony_ci            if (len) {
1137e1051a39Sopenharmony_ci                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1138e1051a39Sopenharmony_ci                ++ctr;
1139e1051a39Sopenharmony_ci                if (IS_LITTLE_ENDIAN)
1140e1051a39Sopenharmony_ci# ifdef BSWAP4
1141e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = BSWAP4(ctr);
1142e1051a39Sopenharmony_ci# else
1143e1051a39Sopenharmony_ci                    PUTU32(ctx->Yi.c + 12, ctr);
1144e1051a39Sopenharmony_ci# endif
1145e1051a39Sopenharmony_ci                else
1146e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = ctr;
1147e1051a39Sopenharmony_ci# if defined(GHASH)
1148e1051a39Sopenharmony_ci                while (len--) {
1149e1051a39Sopenharmony_ci                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1150e1051a39Sopenharmony_ci                    ++n;
1151e1051a39Sopenharmony_ci                }
1152e1051a39Sopenharmony_ci# else
1153e1051a39Sopenharmony_ci                while (len--) {
1154e1051a39Sopenharmony_ci                    ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1155e1051a39Sopenharmony_ci                    ++n;
1156e1051a39Sopenharmony_ci                }
1157e1051a39Sopenharmony_ci                mres = n;
1158e1051a39Sopenharmony_ci# endif
1159e1051a39Sopenharmony_ci            }
1160e1051a39Sopenharmony_ci
1161e1051a39Sopenharmony_ci            ctx->mres = mres;
1162e1051a39Sopenharmony_ci            return 0;
1163e1051a39Sopenharmony_ci        } while (0);
1164e1051a39Sopenharmony_ci    }
1165e1051a39Sopenharmony_ci#endif
1166e1051a39Sopenharmony_ci    for (i = 0; i < len; ++i) {
1167e1051a39Sopenharmony_ci        if (n == 0) {
1168e1051a39Sopenharmony_ci            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1169e1051a39Sopenharmony_ci            ++ctr;
1170e1051a39Sopenharmony_ci            if (IS_LITTLE_ENDIAN)
1171e1051a39Sopenharmony_ci#ifdef BSWAP4
1172e1051a39Sopenharmony_ci                ctx->Yi.d[3] = BSWAP4(ctr);
1173e1051a39Sopenharmony_ci#else
1174e1051a39Sopenharmony_ci                PUTU32(ctx->Yi.c + 12, ctr);
1175e1051a39Sopenharmony_ci#endif
1176e1051a39Sopenharmony_ci            else
1177e1051a39Sopenharmony_ci                ctx->Yi.d[3] = ctr;
1178e1051a39Sopenharmony_ci        }
1179e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1180e1051a39Sopenharmony_ci        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1181e1051a39Sopenharmony_ci        n = (n + 1) % 16;
1182e1051a39Sopenharmony_ci        if (mres == sizeof(ctx->Xn)) {
1183e1051a39Sopenharmony_ci            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1184e1051a39Sopenharmony_ci            mres = 0;
1185e1051a39Sopenharmony_ci        }
1186e1051a39Sopenharmony_ci#else
1187e1051a39Sopenharmony_ci        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1188e1051a39Sopenharmony_ci        mres = n = (n + 1) % 16;
1189e1051a39Sopenharmony_ci        if (n == 0)
1190e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1191e1051a39Sopenharmony_ci#endif
1192e1051a39Sopenharmony_ci    }
1193e1051a39Sopenharmony_ci
1194e1051a39Sopenharmony_ci    ctx->mres = mres;
1195e1051a39Sopenharmony_ci    return 0;
1196e1051a39Sopenharmony_ci}
1197e1051a39Sopenharmony_ci
1198e1051a39Sopenharmony_ciint CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1199e1051a39Sopenharmony_ci                          const unsigned char *in, unsigned char *out,
1200e1051a39Sopenharmony_ci                          size_t len)
1201e1051a39Sopenharmony_ci{
1202e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
1203e1051a39Sopenharmony_ci    unsigned int n, ctr, mres;
1204e1051a39Sopenharmony_ci    size_t i;
1205e1051a39Sopenharmony_ci    u64 mlen = ctx->len.u[1];
1206e1051a39Sopenharmony_ci    block128_f block = ctx->block;
1207e1051a39Sopenharmony_ci    void *key = ctx->key;
1208e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT
1209e1051a39Sopenharmony_ci    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1210e1051a39Sopenharmony_ci# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1211e1051a39Sopenharmony_ci    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1212e1051a39Sopenharmony_ci                         const u8 *inp, size_t len) = ctx->ghash;
1213e1051a39Sopenharmony_ci# endif
1214e1051a39Sopenharmony_ci#endif
1215e1051a39Sopenharmony_ci
1216e1051a39Sopenharmony_ci    mlen += len;
1217e1051a39Sopenharmony_ci    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1218e1051a39Sopenharmony_ci        return -1;
1219e1051a39Sopenharmony_ci    ctx->len.u[1] = mlen;
1220e1051a39Sopenharmony_ci
1221e1051a39Sopenharmony_ci    mres = ctx->mres;
1222e1051a39Sopenharmony_ci
1223e1051a39Sopenharmony_ci    if (ctx->ares) {
1224e1051a39Sopenharmony_ci        /* First call to decrypt finalizes GHASH(AAD) */
1225e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1226e1051a39Sopenharmony_ci        if (len == 0) {
1227e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1228e1051a39Sopenharmony_ci            ctx->ares = 0;
1229e1051a39Sopenharmony_ci            return 0;
1230e1051a39Sopenharmony_ci        }
1231e1051a39Sopenharmony_ci        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1232e1051a39Sopenharmony_ci        ctx->Xi.u[0] = 0;
1233e1051a39Sopenharmony_ci        ctx->Xi.u[1] = 0;
1234e1051a39Sopenharmony_ci        mres = sizeof(ctx->Xi);
1235e1051a39Sopenharmony_ci#else
1236e1051a39Sopenharmony_ci        GCM_MUL(ctx);
1237e1051a39Sopenharmony_ci#endif
1238e1051a39Sopenharmony_ci        ctx->ares = 0;
1239e1051a39Sopenharmony_ci    }
1240e1051a39Sopenharmony_ci
1241e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN)
1242e1051a39Sopenharmony_ci#ifdef BSWAP4
1243e1051a39Sopenharmony_ci        ctr = BSWAP4(ctx->Yi.d[3]);
1244e1051a39Sopenharmony_ci#else
1245e1051a39Sopenharmony_ci        ctr = GETU32(ctx->Yi.c + 12);
1246e1051a39Sopenharmony_ci#endif
1247e1051a39Sopenharmony_ci    else
1248e1051a39Sopenharmony_ci        ctr = ctx->Yi.d[3];
1249e1051a39Sopenharmony_ci
1250e1051a39Sopenharmony_ci    n = mres % 16;
1251e1051a39Sopenharmony_ci#if !defined(OPENSSL_SMALL_FOOTPRINT)
1252e1051a39Sopenharmony_ci    if (16 % sizeof(size_t) == 0) { /* always true actually */
1253e1051a39Sopenharmony_ci        do {
1254e1051a39Sopenharmony_ci            if (n) {
1255e1051a39Sopenharmony_ci# if defined(GHASH)
1256e1051a39Sopenharmony_ci                while (n && len) {
1257e1051a39Sopenharmony_ci                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1258e1051a39Sopenharmony_ci                    --len;
1259e1051a39Sopenharmony_ci                    n = (n + 1) % 16;
1260e1051a39Sopenharmony_ci                }
1261e1051a39Sopenharmony_ci                if (n == 0) {
1262e1051a39Sopenharmony_ci                    GHASH(ctx, ctx->Xn, mres);
1263e1051a39Sopenharmony_ci                    mres = 0;
1264e1051a39Sopenharmony_ci                } else {
1265e1051a39Sopenharmony_ci                    ctx->mres = mres;
1266e1051a39Sopenharmony_ci                    return 0;
1267e1051a39Sopenharmony_ci                }
1268e1051a39Sopenharmony_ci# else
1269e1051a39Sopenharmony_ci                while (n && len) {
1270e1051a39Sopenharmony_ci                    u8 c = *(in++);
1271e1051a39Sopenharmony_ci                    *(out++) = c ^ ctx->EKi.c[n];
1272e1051a39Sopenharmony_ci                    ctx->Xi.c[n] ^= c;
1273e1051a39Sopenharmony_ci                    --len;
1274e1051a39Sopenharmony_ci                    n = (n + 1) % 16;
1275e1051a39Sopenharmony_ci                }
1276e1051a39Sopenharmony_ci                if (n == 0) {
1277e1051a39Sopenharmony_ci                    GCM_MUL(ctx);
1278e1051a39Sopenharmony_ci                    mres = 0;
1279e1051a39Sopenharmony_ci                } else {
1280e1051a39Sopenharmony_ci                    ctx->mres = n;
1281e1051a39Sopenharmony_ci                    return 0;
1282e1051a39Sopenharmony_ci                }
1283e1051a39Sopenharmony_ci# endif
1284e1051a39Sopenharmony_ci            }
1285e1051a39Sopenharmony_ci# if defined(STRICT_ALIGNMENT)
1286e1051a39Sopenharmony_ci            if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1287e1051a39Sopenharmony_ci                break;
1288e1051a39Sopenharmony_ci# endif
1289e1051a39Sopenharmony_ci# if defined(GHASH)
1290e1051a39Sopenharmony_ci            if (len >= 16 && mres) {
1291e1051a39Sopenharmony_ci                GHASH(ctx, ctx->Xn, mres);
1292e1051a39Sopenharmony_ci                mres = 0;
1293e1051a39Sopenharmony_ci            }
1294e1051a39Sopenharmony_ci#  if defined(GHASH_CHUNK)
1295e1051a39Sopenharmony_ci            while (len >= GHASH_CHUNK) {
1296e1051a39Sopenharmony_ci                size_t j = GHASH_CHUNK;
1297e1051a39Sopenharmony_ci
1298e1051a39Sopenharmony_ci                GHASH(ctx, in, GHASH_CHUNK);
1299e1051a39Sopenharmony_ci                while (j) {
1300e1051a39Sopenharmony_ci                    size_t_aX *out_t = (size_t_aX *)out;
1301e1051a39Sopenharmony_ci                    const size_t_aX *in_t = (const size_t_aX *)in;
1302e1051a39Sopenharmony_ci
1303e1051a39Sopenharmony_ci                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1304e1051a39Sopenharmony_ci                    ++ctr;
1305e1051a39Sopenharmony_ci                    if (IS_LITTLE_ENDIAN)
1306e1051a39Sopenharmony_ci#   ifdef BSWAP4
1307e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = BSWAP4(ctr);
1308e1051a39Sopenharmony_ci#   else
1309e1051a39Sopenharmony_ci                        PUTU32(ctx->Yi.c + 12, ctr);
1310e1051a39Sopenharmony_ci#   endif
1311e1051a39Sopenharmony_ci                    else
1312e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = ctr;
1313e1051a39Sopenharmony_ci                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1314e1051a39Sopenharmony_ci                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1315e1051a39Sopenharmony_ci                    out += 16;
1316e1051a39Sopenharmony_ci                    in += 16;
1317e1051a39Sopenharmony_ci                    j -= 16;
1318e1051a39Sopenharmony_ci                }
1319e1051a39Sopenharmony_ci                len -= GHASH_CHUNK;
1320e1051a39Sopenharmony_ci            }
1321e1051a39Sopenharmony_ci#  endif
1322e1051a39Sopenharmony_ci            if ((i = (len & (size_t)-16))) {
1323e1051a39Sopenharmony_ci                GHASH(ctx, in, i);
1324e1051a39Sopenharmony_ci                while (len >= 16) {
1325e1051a39Sopenharmony_ci                    size_t_aX *out_t = (size_t_aX *)out;
1326e1051a39Sopenharmony_ci                    const size_t_aX *in_t = (const size_t_aX *)in;
1327e1051a39Sopenharmony_ci
1328e1051a39Sopenharmony_ci                    (*block) (ctx->Yi.c, ctx->EKi.c, key);
1329e1051a39Sopenharmony_ci                    ++ctr;
1330e1051a39Sopenharmony_ci                    if (IS_LITTLE_ENDIAN)
1331e1051a39Sopenharmony_ci#  ifdef BSWAP4
1332e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = BSWAP4(ctr);
1333e1051a39Sopenharmony_ci#  else
1334e1051a39Sopenharmony_ci                        PUTU32(ctx->Yi.c + 12, ctr);
1335e1051a39Sopenharmony_ci#  endif
1336e1051a39Sopenharmony_ci                    else
1337e1051a39Sopenharmony_ci                        ctx->Yi.d[3] = ctr;
1338e1051a39Sopenharmony_ci                    for (i = 0; i < 16 / sizeof(size_t); ++i)
1339e1051a39Sopenharmony_ci                        out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1340e1051a39Sopenharmony_ci                    out += 16;
1341e1051a39Sopenharmony_ci                    in += 16;
1342e1051a39Sopenharmony_ci                    len -= 16;
1343e1051a39Sopenharmony_ci                }
1344e1051a39Sopenharmony_ci            }
1345e1051a39Sopenharmony_ci# else
1346e1051a39Sopenharmony_ci            while (len >= 16) {
1347e1051a39Sopenharmony_ci                size_t *out_t = (size_t *)out;
1348e1051a39Sopenharmony_ci                const size_t *in_t = (const size_t *)in;
1349e1051a39Sopenharmony_ci
1350e1051a39Sopenharmony_ci                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1351e1051a39Sopenharmony_ci                ++ctr;
1352e1051a39Sopenharmony_ci                if (IS_LITTLE_ENDIAN)
1353e1051a39Sopenharmony_ci#  ifdef BSWAP4
1354e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = BSWAP4(ctr);
1355e1051a39Sopenharmony_ci#  else
1356e1051a39Sopenharmony_ci                    PUTU32(ctx->Yi.c + 12, ctr);
1357e1051a39Sopenharmony_ci#  endif
1358e1051a39Sopenharmony_ci                else
1359e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = ctr;
1360e1051a39Sopenharmony_ci                for (i = 0; i < 16 / sizeof(size_t); ++i) {
1361e1051a39Sopenharmony_ci                    size_t c = in_t[i];
1362e1051a39Sopenharmony_ci                    out_t[i] = c ^ ctx->EKi.t[i];
1363e1051a39Sopenharmony_ci                    ctx->Xi.t[i] ^= c;
1364e1051a39Sopenharmony_ci                }
1365e1051a39Sopenharmony_ci                GCM_MUL(ctx);
1366e1051a39Sopenharmony_ci                out += 16;
1367e1051a39Sopenharmony_ci                in += 16;
1368e1051a39Sopenharmony_ci                len -= 16;
1369e1051a39Sopenharmony_ci            }
1370e1051a39Sopenharmony_ci# endif
1371e1051a39Sopenharmony_ci            if (len) {
1372e1051a39Sopenharmony_ci                (*block) (ctx->Yi.c, ctx->EKi.c, key);
1373e1051a39Sopenharmony_ci                ++ctr;
1374e1051a39Sopenharmony_ci                if (IS_LITTLE_ENDIAN)
1375e1051a39Sopenharmony_ci# ifdef BSWAP4
1376e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = BSWAP4(ctr);
1377e1051a39Sopenharmony_ci# else
1378e1051a39Sopenharmony_ci                    PUTU32(ctx->Yi.c + 12, ctr);
1379e1051a39Sopenharmony_ci# endif
1380e1051a39Sopenharmony_ci                else
1381e1051a39Sopenharmony_ci                    ctx->Yi.d[3] = ctr;
1382e1051a39Sopenharmony_ci# if defined(GHASH)
1383e1051a39Sopenharmony_ci                while (len--) {
1384e1051a39Sopenharmony_ci                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1385e1051a39Sopenharmony_ci                    ++n;
1386e1051a39Sopenharmony_ci                }
1387e1051a39Sopenharmony_ci# else
1388e1051a39Sopenharmony_ci                while (len--) {
1389e1051a39Sopenharmony_ci                    u8 c = in[n];
1390e1051a39Sopenharmony_ci                    ctx->Xi.c[n] ^= c;
1391e1051a39Sopenharmony_ci                    out[n] = c ^ ctx->EKi.c[n];
1392e1051a39Sopenharmony_ci                    ++n;
1393e1051a39Sopenharmony_ci                }
1394e1051a39Sopenharmony_ci                mres = n;
1395e1051a39Sopenharmony_ci# endif
1396e1051a39Sopenharmony_ci            }
1397e1051a39Sopenharmony_ci
1398e1051a39Sopenharmony_ci            ctx->mres = mres;
1399e1051a39Sopenharmony_ci            return 0;
1400e1051a39Sopenharmony_ci        } while (0);
1401e1051a39Sopenharmony_ci    }
1402e1051a39Sopenharmony_ci#endif
1403e1051a39Sopenharmony_ci    for (i = 0; i < len; ++i) {
1404e1051a39Sopenharmony_ci        u8 c;
1405e1051a39Sopenharmony_ci        if (n == 0) {
1406e1051a39Sopenharmony_ci            (*block) (ctx->Yi.c, ctx->EKi.c, key);
1407e1051a39Sopenharmony_ci            ++ctr;
1408e1051a39Sopenharmony_ci            if (IS_LITTLE_ENDIAN)
1409e1051a39Sopenharmony_ci#ifdef BSWAP4
1410e1051a39Sopenharmony_ci                ctx->Yi.d[3] = BSWAP4(ctr);
1411e1051a39Sopenharmony_ci#else
1412e1051a39Sopenharmony_ci                PUTU32(ctx->Yi.c + 12, ctr);
1413e1051a39Sopenharmony_ci#endif
1414e1051a39Sopenharmony_ci            else
1415e1051a39Sopenharmony_ci                ctx->Yi.d[3] = ctr;
1416e1051a39Sopenharmony_ci        }
1417e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1418e1051a39Sopenharmony_ci        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1419e1051a39Sopenharmony_ci        n = (n + 1) % 16;
1420e1051a39Sopenharmony_ci        if (mres == sizeof(ctx->Xn)) {
1421e1051a39Sopenharmony_ci            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1422e1051a39Sopenharmony_ci            mres = 0;
1423e1051a39Sopenharmony_ci        }
1424e1051a39Sopenharmony_ci#else
1425e1051a39Sopenharmony_ci        c = in[i];
1426e1051a39Sopenharmony_ci        out[i] = c ^ ctx->EKi.c[n];
1427e1051a39Sopenharmony_ci        ctx->Xi.c[n] ^= c;
1428e1051a39Sopenharmony_ci        mres = n = (n + 1) % 16;
1429e1051a39Sopenharmony_ci        if (n == 0)
1430e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1431e1051a39Sopenharmony_ci#endif
1432e1051a39Sopenharmony_ci    }
1433e1051a39Sopenharmony_ci
1434e1051a39Sopenharmony_ci    ctx->mres = mres;
1435e1051a39Sopenharmony_ci    return 0;
1436e1051a39Sopenharmony_ci}
1437e1051a39Sopenharmony_ci
1438e1051a39Sopenharmony_ciint CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1439e1051a39Sopenharmony_ci                                const unsigned char *in, unsigned char *out,
1440e1051a39Sopenharmony_ci                                size_t len, ctr128_f stream)
1441e1051a39Sopenharmony_ci{
1442e1051a39Sopenharmony_ci#if defined(OPENSSL_SMALL_FOOTPRINT)
1443e1051a39Sopenharmony_ci    return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1444e1051a39Sopenharmony_ci#else
1445e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
1446e1051a39Sopenharmony_ci    unsigned int n, ctr, mres;
1447e1051a39Sopenharmony_ci    size_t i;
1448e1051a39Sopenharmony_ci    u64 mlen = ctx->len.u[1];
1449e1051a39Sopenharmony_ci    void *key = ctx->key;
1450e1051a39Sopenharmony_ci# ifdef GCM_FUNCREF_4BIT
1451e1051a39Sopenharmony_ci    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1452e1051a39Sopenharmony_ci#  ifdef GHASH
1453e1051a39Sopenharmony_ci    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1454e1051a39Sopenharmony_ci                         const u8 *inp, size_t len) = ctx->ghash;
1455e1051a39Sopenharmony_ci#  endif
1456e1051a39Sopenharmony_ci# endif
1457e1051a39Sopenharmony_ci
1458e1051a39Sopenharmony_ci    mlen += len;
1459e1051a39Sopenharmony_ci    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1460e1051a39Sopenharmony_ci        return -1;
1461e1051a39Sopenharmony_ci    ctx->len.u[1] = mlen;
1462e1051a39Sopenharmony_ci
1463e1051a39Sopenharmony_ci    mres = ctx->mres;
1464e1051a39Sopenharmony_ci
1465e1051a39Sopenharmony_ci    if (ctx->ares) {
1466e1051a39Sopenharmony_ci        /* First call to encrypt finalizes GHASH(AAD) */
1467e1051a39Sopenharmony_ci#if defined(GHASH)
1468e1051a39Sopenharmony_ci        if (len == 0) {
1469e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1470e1051a39Sopenharmony_ci            ctx->ares = 0;
1471e1051a39Sopenharmony_ci            return 0;
1472e1051a39Sopenharmony_ci        }
1473e1051a39Sopenharmony_ci        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1474e1051a39Sopenharmony_ci        ctx->Xi.u[0] = 0;
1475e1051a39Sopenharmony_ci        ctx->Xi.u[1] = 0;
1476e1051a39Sopenharmony_ci        mres = sizeof(ctx->Xi);
1477e1051a39Sopenharmony_ci#else
1478e1051a39Sopenharmony_ci        GCM_MUL(ctx);
1479e1051a39Sopenharmony_ci#endif
1480e1051a39Sopenharmony_ci        ctx->ares = 0;
1481e1051a39Sopenharmony_ci    }
1482e1051a39Sopenharmony_ci
1483e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN)
1484e1051a39Sopenharmony_ci# ifdef BSWAP4
1485e1051a39Sopenharmony_ci        ctr = BSWAP4(ctx->Yi.d[3]);
1486e1051a39Sopenharmony_ci# else
1487e1051a39Sopenharmony_ci        ctr = GETU32(ctx->Yi.c + 12);
1488e1051a39Sopenharmony_ci# endif
1489e1051a39Sopenharmony_ci    else
1490e1051a39Sopenharmony_ci        ctr = ctx->Yi.d[3];
1491e1051a39Sopenharmony_ci
1492e1051a39Sopenharmony_ci    n = mres % 16;
1493e1051a39Sopenharmony_ci    if (n) {
1494e1051a39Sopenharmony_ci# if defined(GHASH)
1495e1051a39Sopenharmony_ci        while (n && len) {
1496e1051a39Sopenharmony_ci            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1497e1051a39Sopenharmony_ci            --len;
1498e1051a39Sopenharmony_ci            n = (n + 1) % 16;
1499e1051a39Sopenharmony_ci        }
1500e1051a39Sopenharmony_ci        if (n == 0) {
1501e1051a39Sopenharmony_ci            GHASH(ctx, ctx->Xn, mres);
1502e1051a39Sopenharmony_ci            mres = 0;
1503e1051a39Sopenharmony_ci        } else {
1504e1051a39Sopenharmony_ci            ctx->mres = mres;
1505e1051a39Sopenharmony_ci            return 0;
1506e1051a39Sopenharmony_ci        }
1507e1051a39Sopenharmony_ci# else
1508e1051a39Sopenharmony_ci        while (n && len) {
1509e1051a39Sopenharmony_ci            ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1510e1051a39Sopenharmony_ci            --len;
1511e1051a39Sopenharmony_ci            n = (n + 1) % 16;
1512e1051a39Sopenharmony_ci        }
1513e1051a39Sopenharmony_ci        if (n == 0) {
1514e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1515e1051a39Sopenharmony_ci            mres = 0;
1516e1051a39Sopenharmony_ci        } else {
1517e1051a39Sopenharmony_ci            ctx->mres = n;
1518e1051a39Sopenharmony_ci            return 0;
1519e1051a39Sopenharmony_ci        }
1520e1051a39Sopenharmony_ci# endif
1521e1051a39Sopenharmony_ci    }
1522e1051a39Sopenharmony_ci# if defined(GHASH)
1523e1051a39Sopenharmony_ci        if (len >= 16 && mres) {
1524e1051a39Sopenharmony_ci            GHASH(ctx, ctx->Xn, mres);
1525e1051a39Sopenharmony_ci            mres = 0;
1526e1051a39Sopenharmony_ci        }
1527e1051a39Sopenharmony_ci#  if defined(GHASH_CHUNK)
1528e1051a39Sopenharmony_ci    while (len >= GHASH_CHUNK) {
1529e1051a39Sopenharmony_ci        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1530e1051a39Sopenharmony_ci        ctr += GHASH_CHUNK / 16;
1531e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
1532e1051a39Sopenharmony_ci#   ifdef BSWAP4
1533e1051a39Sopenharmony_ci            ctx->Yi.d[3] = BSWAP4(ctr);
1534e1051a39Sopenharmony_ci#   else
1535e1051a39Sopenharmony_ci            PUTU32(ctx->Yi.c + 12, ctr);
1536e1051a39Sopenharmony_ci#   endif
1537e1051a39Sopenharmony_ci        else
1538e1051a39Sopenharmony_ci            ctx->Yi.d[3] = ctr;
1539e1051a39Sopenharmony_ci        GHASH(ctx, out, GHASH_CHUNK);
1540e1051a39Sopenharmony_ci        out += GHASH_CHUNK;
1541e1051a39Sopenharmony_ci        in += GHASH_CHUNK;
1542e1051a39Sopenharmony_ci        len -= GHASH_CHUNK;
1543e1051a39Sopenharmony_ci    }
1544e1051a39Sopenharmony_ci#  endif
1545e1051a39Sopenharmony_ci# endif
1546e1051a39Sopenharmony_ci    if ((i = (len & (size_t)-16))) {
1547e1051a39Sopenharmony_ci        size_t j = i / 16;
1548e1051a39Sopenharmony_ci
1549e1051a39Sopenharmony_ci        (*stream) (in, out, j, key, ctx->Yi.c);
1550e1051a39Sopenharmony_ci        ctr += (unsigned int)j;
1551e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
1552e1051a39Sopenharmony_ci# ifdef BSWAP4
1553e1051a39Sopenharmony_ci            ctx->Yi.d[3] = BSWAP4(ctr);
1554e1051a39Sopenharmony_ci# else
1555e1051a39Sopenharmony_ci            PUTU32(ctx->Yi.c + 12, ctr);
1556e1051a39Sopenharmony_ci# endif
1557e1051a39Sopenharmony_ci        else
1558e1051a39Sopenharmony_ci            ctx->Yi.d[3] = ctr;
1559e1051a39Sopenharmony_ci        in += i;
1560e1051a39Sopenharmony_ci        len -= i;
1561e1051a39Sopenharmony_ci# if defined(GHASH)
1562e1051a39Sopenharmony_ci        GHASH(ctx, out, i);
1563e1051a39Sopenharmony_ci        out += i;
1564e1051a39Sopenharmony_ci# else
1565e1051a39Sopenharmony_ci        while (j--) {
1566e1051a39Sopenharmony_ci            for (i = 0; i < 16; ++i)
1567e1051a39Sopenharmony_ci                ctx->Xi.c[i] ^= out[i];
1568e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1569e1051a39Sopenharmony_ci            out += 16;
1570e1051a39Sopenharmony_ci        }
1571e1051a39Sopenharmony_ci# endif
1572e1051a39Sopenharmony_ci    }
1573e1051a39Sopenharmony_ci    if (len) {
1574e1051a39Sopenharmony_ci        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1575e1051a39Sopenharmony_ci        ++ctr;
1576e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
1577e1051a39Sopenharmony_ci# ifdef BSWAP4
1578e1051a39Sopenharmony_ci            ctx->Yi.d[3] = BSWAP4(ctr);
1579e1051a39Sopenharmony_ci# else
1580e1051a39Sopenharmony_ci            PUTU32(ctx->Yi.c + 12, ctr);
1581e1051a39Sopenharmony_ci# endif
1582e1051a39Sopenharmony_ci        else
1583e1051a39Sopenharmony_ci            ctx->Yi.d[3] = ctr;
1584e1051a39Sopenharmony_ci        while (len--) {
1585e1051a39Sopenharmony_ci# if defined(GHASH)
1586e1051a39Sopenharmony_ci            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1587e1051a39Sopenharmony_ci# else
1588e1051a39Sopenharmony_ci            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1589e1051a39Sopenharmony_ci# endif
1590e1051a39Sopenharmony_ci            ++n;
1591e1051a39Sopenharmony_ci        }
1592e1051a39Sopenharmony_ci    }
1593e1051a39Sopenharmony_ci
1594e1051a39Sopenharmony_ci    ctx->mres = mres;
1595e1051a39Sopenharmony_ci    return 0;
1596e1051a39Sopenharmony_ci#endif
1597e1051a39Sopenharmony_ci}
1598e1051a39Sopenharmony_ci
1599e1051a39Sopenharmony_ciint CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1600e1051a39Sopenharmony_ci                                const unsigned char *in, unsigned char *out,
1601e1051a39Sopenharmony_ci                                size_t len, ctr128_f stream)
1602e1051a39Sopenharmony_ci{
1603e1051a39Sopenharmony_ci#if defined(OPENSSL_SMALL_FOOTPRINT)
1604e1051a39Sopenharmony_ci    return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1605e1051a39Sopenharmony_ci#else
1606e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
1607e1051a39Sopenharmony_ci    unsigned int n, ctr, mres;
1608e1051a39Sopenharmony_ci    size_t i;
1609e1051a39Sopenharmony_ci    u64 mlen = ctx->len.u[1];
1610e1051a39Sopenharmony_ci    void *key = ctx->key;
1611e1051a39Sopenharmony_ci# ifdef GCM_FUNCREF_4BIT
1612e1051a39Sopenharmony_ci    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1613e1051a39Sopenharmony_ci#  ifdef GHASH
1614e1051a39Sopenharmony_ci    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1615e1051a39Sopenharmony_ci                         const u8 *inp, size_t len) = ctx->ghash;
1616e1051a39Sopenharmony_ci#  endif
1617e1051a39Sopenharmony_ci# endif
1618e1051a39Sopenharmony_ci
1619e1051a39Sopenharmony_ci    mlen += len;
1620e1051a39Sopenharmony_ci    if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1621e1051a39Sopenharmony_ci        return -1;
1622e1051a39Sopenharmony_ci    ctx->len.u[1] = mlen;
1623e1051a39Sopenharmony_ci
1624e1051a39Sopenharmony_ci    mres = ctx->mres;
1625e1051a39Sopenharmony_ci
1626e1051a39Sopenharmony_ci    if (ctx->ares) {
1627e1051a39Sopenharmony_ci        /* First call to decrypt finalizes GHASH(AAD) */
1628e1051a39Sopenharmony_ci# if defined(GHASH)
1629e1051a39Sopenharmony_ci        if (len == 0) {
1630e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1631e1051a39Sopenharmony_ci            ctx->ares = 0;
1632e1051a39Sopenharmony_ci            return 0;
1633e1051a39Sopenharmony_ci        }
1634e1051a39Sopenharmony_ci        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1635e1051a39Sopenharmony_ci        ctx->Xi.u[0] = 0;
1636e1051a39Sopenharmony_ci        ctx->Xi.u[1] = 0;
1637e1051a39Sopenharmony_ci        mres = sizeof(ctx->Xi);
1638e1051a39Sopenharmony_ci# else
1639e1051a39Sopenharmony_ci        GCM_MUL(ctx);
1640e1051a39Sopenharmony_ci# endif
1641e1051a39Sopenharmony_ci        ctx->ares = 0;
1642e1051a39Sopenharmony_ci    }
1643e1051a39Sopenharmony_ci
1644e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN)
1645e1051a39Sopenharmony_ci# ifdef BSWAP4
1646e1051a39Sopenharmony_ci        ctr = BSWAP4(ctx->Yi.d[3]);
1647e1051a39Sopenharmony_ci# else
1648e1051a39Sopenharmony_ci        ctr = GETU32(ctx->Yi.c + 12);
1649e1051a39Sopenharmony_ci# endif
1650e1051a39Sopenharmony_ci    else
1651e1051a39Sopenharmony_ci        ctr = ctx->Yi.d[3];
1652e1051a39Sopenharmony_ci
1653e1051a39Sopenharmony_ci    n = mres % 16;
1654e1051a39Sopenharmony_ci    if (n) {
1655e1051a39Sopenharmony_ci# if defined(GHASH)
1656e1051a39Sopenharmony_ci        while (n && len) {
1657e1051a39Sopenharmony_ci            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1658e1051a39Sopenharmony_ci            --len;
1659e1051a39Sopenharmony_ci            n = (n + 1) % 16;
1660e1051a39Sopenharmony_ci        }
1661e1051a39Sopenharmony_ci        if (n == 0) {
1662e1051a39Sopenharmony_ci            GHASH(ctx, ctx->Xn, mres);
1663e1051a39Sopenharmony_ci            mres = 0;
1664e1051a39Sopenharmony_ci        } else {
1665e1051a39Sopenharmony_ci            ctx->mres = mres;
1666e1051a39Sopenharmony_ci            return 0;
1667e1051a39Sopenharmony_ci        }
1668e1051a39Sopenharmony_ci# else
1669e1051a39Sopenharmony_ci        while (n && len) {
1670e1051a39Sopenharmony_ci            u8 c = *(in++);
1671e1051a39Sopenharmony_ci            *(out++) = c ^ ctx->EKi.c[n];
1672e1051a39Sopenharmony_ci            ctx->Xi.c[n] ^= c;
1673e1051a39Sopenharmony_ci            --len;
1674e1051a39Sopenharmony_ci            n = (n + 1) % 16;
1675e1051a39Sopenharmony_ci        }
1676e1051a39Sopenharmony_ci        if (n == 0) {
1677e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1678e1051a39Sopenharmony_ci            mres = 0;
1679e1051a39Sopenharmony_ci        } else {
1680e1051a39Sopenharmony_ci            ctx->mres = n;
1681e1051a39Sopenharmony_ci            return 0;
1682e1051a39Sopenharmony_ci        }
1683e1051a39Sopenharmony_ci# endif
1684e1051a39Sopenharmony_ci    }
1685e1051a39Sopenharmony_ci# if defined(GHASH)
1686e1051a39Sopenharmony_ci    if (len >= 16 && mres) {
1687e1051a39Sopenharmony_ci        GHASH(ctx, ctx->Xn, mres);
1688e1051a39Sopenharmony_ci        mres = 0;
1689e1051a39Sopenharmony_ci    }
1690e1051a39Sopenharmony_ci#  if defined(GHASH_CHUNK)
1691e1051a39Sopenharmony_ci    while (len >= GHASH_CHUNK) {
1692e1051a39Sopenharmony_ci        GHASH(ctx, in, GHASH_CHUNK);
1693e1051a39Sopenharmony_ci        (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1694e1051a39Sopenharmony_ci        ctr += GHASH_CHUNK / 16;
1695e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
1696e1051a39Sopenharmony_ci#   ifdef BSWAP4
1697e1051a39Sopenharmony_ci            ctx->Yi.d[3] = BSWAP4(ctr);
1698e1051a39Sopenharmony_ci#   else
1699e1051a39Sopenharmony_ci            PUTU32(ctx->Yi.c + 12, ctr);
1700e1051a39Sopenharmony_ci#   endif
1701e1051a39Sopenharmony_ci        else
1702e1051a39Sopenharmony_ci            ctx->Yi.d[3] = ctr;
1703e1051a39Sopenharmony_ci        out += GHASH_CHUNK;
1704e1051a39Sopenharmony_ci        in += GHASH_CHUNK;
1705e1051a39Sopenharmony_ci        len -= GHASH_CHUNK;
1706e1051a39Sopenharmony_ci    }
1707e1051a39Sopenharmony_ci#  endif
1708e1051a39Sopenharmony_ci# endif
1709e1051a39Sopenharmony_ci    if ((i = (len & (size_t)-16))) {
1710e1051a39Sopenharmony_ci        size_t j = i / 16;
1711e1051a39Sopenharmony_ci
1712e1051a39Sopenharmony_ci# if defined(GHASH)
1713e1051a39Sopenharmony_ci        GHASH(ctx, in, i);
1714e1051a39Sopenharmony_ci# else
1715e1051a39Sopenharmony_ci        while (j--) {
1716e1051a39Sopenharmony_ci            size_t k;
1717e1051a39Sopenharmony_ci            for (k = 0; k < 16; ++k)
1718e1051a39Sopenharmony_ci                ctx->Xi.c[k] ^= in[k];
1719e1051a39Sopenharmony_ci            GCM_MUL(ctx);
1720e1051a39Sopenharmony_ci            in += 16;
1721e1051a39Sopenharmony_ci        }
1722e1051a39Sopenharmony_ci        j = i / 16;
1723e1051a39Sopenharmony_ci        in -= i;
1724e1051a39Sopenharmony_ci# endif
1725e1051a39Sopenharmony_ci        (*stream) (in, out, j, key, ctx->Yi.c);
1726e1051a39Sopenharmony_ci        ctr += (unsigned int)j;
1727e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
1728e1051a39Sopenharmony_ci# ifdef BSWAP4
1729e1051a39Sopenharmony_ci            ctx->Yi.d[3] = BSWAP4(ctr);
1730e1051a39Sopenharmony_ci# else
1731e1051a39Sopenharmony_ci            PUTU32(ctx->Yi.c + 12, ctr);
1732e1051a39Sopenharmony_ci# endif
1733e1051a39Sopenharmony_ci        else
1734e1051a39Sopenharmony_ci            ctx->Yi.d[3] = ctr;
1735e1051a39Sopenharmony_ci        out += i;
1736e1051a39Sopenharmony_ci        in += i;
1737e1051a39Sopenharmony_ci        len -= i;
1738e1051a39Sopenharmony_ci    }
1739e1051a39Sopenharmony_ci    if (len) {
1740e1051a39Sopenharmony_ci        (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1741e1051a39Sopenharmony_ci        ++ctr;
1742e1051a39Sopenharmony_ci        if (IS_LITTLE_ENDIAN)
1743e1051a39Sopenharmony_ci# ifdef BSWAP4
1744e1051a39Sopenharmony_ci            ctx->Yi.d[3] = BSWAP4(ctr);
1745e1051a39Sopenharmony_ci# else
1746e1051a39Sopenharmony_ci            PUTU32(ctx->Yi.c + 12, ctr);
1747e1051a39Sopenharmony_ci# endif
1748e1051a39Sopenharmony_ci        else
1749e1051a39Sopenharmony_ci            ctx->Yi.d[3] = ctr;
1750e1051a39Sopenharmony_ci        while (len--) {
1751e1051a39Sopenharmony_ci# if defined(GHASH)
1752e1051a39Sopenharmony_ci            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1753e1051a39Sopenharmony_ci# else
1754e1051a39Sopenharmony_ci            u8 c = in[n];
1755e1051a39Sopenharmony_ci            ctx->Xi.c[mres++] ^= c;
1756e1051a39Sopenharmony_ci            out[n] = c ^ ctx->EKi.c[n];
1757e1051a39Sopenharmony_ci# endif
1758e1051a39Sopenharmony_ci            ++n;
1759e1051a39Sopenharmony_ci        }
1760e1051a39Sopenharmony_ci    }
1761e1051a39Sopenharmony_ci
1762e1051a39Sopenharmony_ci    ctx->mres = mres;
1763e1051a39Sopenharmony_ci    return 0;
1764e1051a39Sopenharmony_ci#endif
1765e1051a39Sopenharmony_ci}
1766e1051a39Sopenharmony_ci
1767e1051a39Sopenharmony_ciint CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1768e1051a39Sopenharmony_ci                         size_t len)
1769e1051a39Sopenharmony_ci{
1770e1051a39Sopenharmony_ci    DECLARE_IS_ENDIAN;
1771e1051a39Sopenharmony_ci    u64 alen = ctx->len.u[0] << 3;
1772e1051a39Sopenharmony_ci    u64 clen = ctx->len.u[1] << 3;
1773e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT
1774e1051a39Sopenharmony_ci    void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1775e1051a39Sopenharmony_ci# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1776e1051a39Sopenharmony_ci    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1777e1051a39Sopenharmony_ci                         const u8 *inp, size_t len) = ctx->ghash;
1778e1051a39Sopenharmony_ci# endif
1779e1051a39Sopenharmony_ci#endif
1780e1051a39Sopenharmony_ci
1781e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1782e1051a39Sopenharmony_ci    u128 bitlen;
1783e1051a39Sopenharmony_ci    unsigned int mres = ctx->mres;
1784e1051a39Sopenharmony_ci
1785e1051a39Sopenharmony_ci    if (mres) {
1786e1051a39Sopenharmony_ci        unsigned blocks = (mres + 15) & -16;
1787e1051a39Sopenharmony_ci
1788e1051a39Sopenharmony_ci        memset(ctx->Xn + mres, 0, blocks - mres);
1789e1051a39Sopenharmony_ci        mres = blocks;
1790e1051a39Sopenharmony_ci        if (mres == sizeof(ctx->Xn)) {
1791e1051a39Sopenharmony_ci            GHASH(ctx, ctx->Xn, mres);
1792e1051a39Sopenharmony_ci            mres = 0;
1793e1051a39Sopenharmony_ci        }
1794e1051a39Sopenharmony_ci    } else if (ctx->ares) {
1795e1051a39Sopenharmony_ci        GCM_MUL(ctx);
1796e1051a39Sopenharmony_ci    }
1797e1051a39Sopenharmony_ci#else
1798e1051a39Sopenharmony_ci    if (ctx->mres || ctx->ares)
1799e1051a39Sopenharmony_ci        GCM_MUL(ctx);
1800e1051a39Sopenharmony_ci#endif
1801e1051a39Sopenharmony_ci
1802e1051a39Sopenharmony_ci    if (IS_LITTLE_ENDIAN) {
1803e1051a39Sopenharmony_ci#ifdef BSWAP8
1804e1051a39Sopenharmony_ci        alen = BSWAP8(alen);
1805e1051a39Sopenharmony_ci        clen = BSWAP8(clen);
1806e1051a39Sopenharmony_ci#else
1807e1051a39Sopenharmony_ci        u8 *p = ctx->len.c;
1808e1051a39Sopenharmony_ci
1809e1051a39Sopenharmony_ci        ctx->len.u[0] = alen;
1810e1051a39Sopenharmony_ci        ctx->len.u[1] = clen;
1811e1051a39Sopenharmony_ci
1812e1051a39Sopenharmony_ci        alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1813e1051a39Sopenharmony_ci        clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1814e1051a39Sopenharmony_ci#endif
1815e1051a39Sopenharmony_ci    }
1816e1051a39Sopenharmony_ci
1817e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1818e1051a39Sopenharmony_ci    bitlen.hi = alen;
1819e1051a39Sopenharmony_ci    bitlen.lo = clen;
1820e1051a39Sopenharmony_ci    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1821e1051a39Sopenharmony_ci    mres += sizeof(bitlen);
1822e1051a39Sopenharmony_ci    GHASH(ctx, ctx->Xn, mres);
1823e1051a39Sopenharmony_ci#else
1824e1051a39Sopenharmony_ci    ctx->Xi.u[0] ^= alen;
1825e1051a39Sopenharmony_ci    ctx->Xi.u[1] ^= clen;
1826e1051a39Sopenharmony_ci    GCM_MUL(ctx);
1827e1051a39Sopenharmony_ci#endif
1828e1051a39Sopenharmony_ci
1829e1051a39Sopenharmony_ci    ctx->Xi.u[0] ^= ctx->EK0.u[0];
1830e1051a39Sopenharmony_ci    ctx->Xi.u[1] ^= ctx->EK0.u[1];
1831e1051a39Sopenharmony_ci
1832e1051a39Sopenharmony_ci    if (tag && len <= sizeof(ctx->Xi))
1833e1051a39Sopenharmony_ci        return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1834e1051a39Sopenharmony_ci    else
1835e1051a39Sopenharmony_ci        return -1;
1836e1051a39Sopenharmony_ci}
1837e1051a39Sopenharmony_ci
1838e1051a39Sopenharmony_civoid CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1839e1051a39Sopenharmony_ci{
1840e1051a39Sopenharmony_ci    CRYPTO_gcm128_finish(ctx, NULL, 0);
1841e1051a39Sopenharmony_ci    memcpy(tag, ctx->Xi.c,
1842e1051a39Sopenharmony_ci           len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1843e1051a39Sopenharmony_ci}
1844e1051a39Sopenharmony_ci
1845e1051a39Sopenharmony_ciGCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1846e1051a39Sopenharmony_ci{
1847e1051a39Sopenharmony_ci    GCM128_CONTEXT *ret;
1848e1051a39Sopenharmony_ci
1849e1051a39Sopenharmony_ci    if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1850e1051a39Sopenharmony_ci        CRYPTO_gcm128_init(ret, key, block);
1851e1051a39Sopenharmony_ci
1852e1051a39Sopenharmony_ci    return ret;
1853e1051a39Sopenharmony_ci}
1854e1051a39Sopenharmony_ci
1855e1051a39Sopenharmony_civoid CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1856e1051a39Sopenharmony_ci{
1857e1051a39Sopenharmony_ci    OPENSSL_clear_free(ctx, sizeof(*ctx));
1858e1051a39Sopenharmony_ci}
1859