xref: /third_party/openssl/crypto/sha/keccak1600.c (revision e1051a39)
1e1051a39Sopenharmony_ci/*
2e1051a39Sopenharmony_ci * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci *
4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci * this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci */
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci#include <openssl/e_os2.h>
11e1051a39Sopenharmony_ci#include <string.h>
12e1051a39Sopenharmony_ci#include <assert.h>
13e1051a39Sopenharmony_ci
14e1051a39Sopenharmony_cisize_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
15e1051a39Sopenharmony_ci                   size_t r);
16e1051a39Sopenharmony_civoid SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r);
17e1051a39Sopenharmony_ci
18e1051a39Sopenharmony_ci#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
19e1051a39Sopenharmony_ci
20e1051a39Sopenharmony_ci/*
21e1051a39Sopenharmony_ci * Choose some sensible defaults
22e1051a39Sopenharmony_ci */
23e1051a39Sopenharmony_ci#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && \
24e1051a39Sopenharmony_ci    !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
25e1051a39Sopenharmony_ci# define KECCAK_2X      /* default to KECCAK_2X variant */
26e1051a39Sopenharmony_ci#endif
27e1051a39Sopenharmony_ci
28e1051a39Sopenharmony_ci#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
29e1051a39Sopenharmony_ci    (defined(__x86_64) && !defined(__BMI__)) || defined(_M_X64) || \
30e1051a39Sopenharmony_ci    defined(__mips) || defined(__riscv) || defined(__s390__) || \
31e1051a39Sopenharmony_ci    defined(__EMSCRIPTEN__)
32e1051a39Sopenharmony_ci/*
33e1051a39Sopenharmony_ci * These don't have "and with complement" instruction, so minimize amount
34e1051a39Sopenharmony_ci * of "not"-s. Implemented only in the [default] KECCAK_2X variant.
35e1051a39Sopenharmony_ci */
36e1051a39Sopenharmony_ci# define KECCAK_COMPLEMENTING_TRANSFORM
37e1051a39Sopenharmony_ci#endif
38e1051a39Sopenharmony_ci
39e1051a39Sopenharmony_ci#if defined(__x86_64__) || defined(__aarch64__) || \
40e1051a39Sopenharmony_ci    defined(__mips64) || defined(__ia64) || \
41e1051a39Sopenharmony_ci    (defined(__VMS) && !defined(__vax))
42e1051a39Sopenharmony_ci/*
43e1051a39Sopenharmony_ci * These are available even in ILP32 flavours, but even then they are
44e1051a39Sopenharmony_ci * capable of performing 64-bit operations as efficiently as in *P64.
45e1051a39Sopenharmony_ci * Since it's not given that we can use sizeof(void *), just shunt it.
46e1051a39Sopenharmony_ci */
47e1051a39Sopenharmony_ci# define BIT_INTERLEAVE (0)
48e1051a39Sopenharmony_ci#else
49e1051a39Sopenharmony_ci# define BIT_INTERLEAVE (sizeof(void *) < 8)
50e1051a39Sopenharmony_ci#endif
51e1051a39Sopenharmony_ci
52e1051a39Sopenharmony_ci#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
53e1051a39Sopenharmony_ci
54e1051a39Sopenharmony_cistatic uint64_t ROL64(uint64_t val, int offset)
55e1051a39Sopenharmony_ci{
56e1051a39Sopenharmony_ci    if (offset == 0) {
57e1051a39Sopenharmony_ci        return val;
58e1051a39Sopenharmony_ci    } else if (!BIT_INTERLEAVE) {
59e1051a39Sopenharmony_ci        return (val << offset) | (val >> (64-offset));
60e1051a39Sopenharmony_ci    } else {
61e1051a39Sopenharmony_ci        uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
62e1051a39Sopenharmony_ci
63e1051a39Sopenharmony_ci        if (offset & 1) {
64e1051a39Sopenharmony_ci            uint32_t tmp = hi;
65e1051a39Sopenharmony_ci
66e1051a39Sopenharmony_ci            offset >>= 1;
67e1051a39Sopenharmony_ci            hi = ROL32(lo, offset);
68e1051a39Sopenharmony_ci            lo = ROL32(tmp, offset + 1);
69e1051a39Sopenharmony_ci        } else {
70e1051a39Sopenharmony_ci            offset >>= 1;
71e1051a39Sopenharmony_ci            lo = ROL32(lo, offset);
72e1051a39Sopenharmony_ci            hi = ROL32(hi, offset);
73e1051a39Sopenharmony_ci        }
74e1051a39Sopenharmony_ci
75e1051a39Sopenharmony_ci        return ((uint64_t)hi << 32) | lo;
76e1051a39Sopenharmony_ci    }
77e1051a39Sopenharmony_ci}
78e1051a39Sopenharmony_ci
79e1051a39Sopenharmony_cistatic const unsigned char rhotates[5][5] = {
80e1051a39Sopenharmony_ci    {  0,  1, 62, 28, 27 },
81e1051a39Sopenharmony_ci    { 36, 44,  6, 55, 20 },
82e1051a39Sopenharmony_ci    {  3, 10, 43, 25, 39 },
83e1051a39Sopenharmony_ci    { 41, 45, 15, 21,  8 },
84e1051a39Sopenharmony_ci    { 18,  2, 61, 56, 14 }
85e1051a39Sopenharmony_ci};
86e1051a39Sopenharmony_ci
87e1051a39Sopenharmony_cistatic const uint64_t iotas[] = {
88e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
89e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
90e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
91e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
92e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
93e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
94e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
95e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
96e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
97e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
98e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
99e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
100e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
101e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
102e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
103e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
104e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
105e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
106e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
107e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
108e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
109e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
110e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
111e1051a39Sopenharmony_ci    BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
112e1051a39Sopenharmony_ci};
113e1051a39Sopenharmony_ci
114e1051a39Sopenharmony_ci#if defined(KECCAK_REF)
115e1051a39Sopenharmony_ci/*
116e1051a39Sopenharmony_ci * This is straightforward or "maximum clarity" implementation aiming
117e1051a39Sopenharmony_ci * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
118e1051a39Sopenharmony_ci * Permutation-Based Hash and Extendible-Output Functions" as much as
119e1051a39Sopenharmony_ci * possible. With one caveat. Because of the way C stores matrices,
120e1051a39Sopenharmony_ci * references to A[x,y] in the specification are presented as A[y][x].
121e1051a39Sopenharmony_ci * Implementation unrolls inner x-loops so that modulo 5 operations are
122e1051a39Sopenharmony_ci * explicitly pre-computed.
123e1051a39Sopenharmony_ci */
124e1051a39Sopenharmony_cistatic void Theta(uint64_t A[5][5])
125e1051a39Sopenharmony_ci{
126e1051a39Sopenharmony_ci    uint64_t C[5], D[5];
127e1051a39Sopenharmony_ci    size_t y;
128e1051a39Sopenharmony_ci
129e1051a39Sopenharmony_ci    C[0] = A[0][0];
130e1051a39Sopenharmony_ci    C[1] = A[0][1];
131e1051a39Sopenharmony_ci    C[2] = A[0][2];
132e1051a39Sopenharmony_ci    C[3] = A[0][3];
133e1051a39Sopenharmony_ci    C[4] = A[0][4];
134e1051a39Sopenharmony_ci
135e1051a39Sopenharmony_ci    for (y = 1; y < 5; y++) {
136e1051a39Sopenharmony_ci        C[0] ^= A[y][0];
137e1051a39Sopenharmony_ci        C[1] ^= A[y][1];
138e1051a39Sopenharmony_ci        C[2] ^= A[y][2];
139e1051a39Sopenharmony_ci        C[3] ^= A[y][3];
140e1051a39Sopenharmony_ci        C[4] ^= A[y][4];
141e1051a39Sopenharmony_ci    }
142e1051a39Sopenharmony_ci
143e1051a39Sopenharmony_ci    D[0] = ROL64(C[1], 1) ^ C[4];
144e1051a39Sopenharmony_ci    D[1] = ROL64(C[2], 1) ^ C[0];
145e1051a39Sopenharmony_ci    D[2] = ROL64(C[3], 1) ^ C[1];
146e1051a39Sopenharmony_ci    D[3] = ROL64(C[4], 1) ^ C[2];
147e1051a39Sopenharmony_ci    D[4] = ROL64(C[0], 1) ^ C[3];
148e1051a39Sopenharmony_ci
149e1051a39Sopenharmony_ci    for (y = 0; y < 5; y++) {
150e1051a39Sopenharmony_ci        A[y][0] ^= D[0];
151e1051a39Sopenharmony_ci        A[y][1] ^= D[1];
152e1051a39Sopenharmony_ci        A[y][2] ^= D[2];
153e1051a39Sopenharmony_ci        A[y][3] ^= D[3];
154e1051a39Sopenharmony_ci        A[y][4] ^= D[4];
155e1051a39Sopenharmony_ci    }
156e1051a39Sopenharmony_ci}
157e1051a39Sopenharmony_ci
158e1051a39Sopenharmony_cistatic void Rho(uint64_t A[5][5])
159e1051a39Sopenharmony_ci{
160e1051a39Sopenharmony_ci    size_t y;
161e1051a39Sopenharmony_ci
162e1051a39Sopenharmony_ci    for (y = 0; y < 5; y++) {
163e1051a39Sopenharmony_ci        A[y][0] = ROL64(A[y][0], rhotates[y][0]);
164e1051a39Sopenharmony_ci        A[y][1] = ROL64(A[y][1], rhotates[y][1]);
165e1051a39Sopenharmony_ci        A[y][2] = ROL64(A[y][2], rhotates[y][2]);
166e1051a39Sopenharmony_ci        A[y][3] = ROL64(A[y][3], rhotates[y][3]);
167e1051a39Sopenharmony_ci        A[y][4] = ROL64(A[y][4], rhotates[y][4]);
168e1051a39Sopenharmony_ci    }
169e1051a39Sopenharmony_ci}
170e1051a39Sopenharmony_ci
171e1051a39Sopenharmony_cistatic void Pi(uint64_t A[5][5])
172e1051a39Sopenharmony_ci{
173e1051a39Sopenharmony_ci    uint64_t T[5][5];
174e1051a39Sopenharmony_ci
175e1051a39Sopenharmony_ci    /*
176e1051a39Sopenharmony_ci     * T = A
177e1051a39Sopenharmony_ci     * A[y][x] = T[x][(3*y+x)%5]
178e1051a39Sopenharmony_ci     */
179e1051a39Sopenharmony_ci    memcpy(T, A, sizeof(T));
180e1051a39Sopenharmony_ci
181e1051a39Sopenharmony_ci    A[0][0] = T[0][0];
182e1051a39Sopenharmony_ci    A[0][1] = T[1][1];
183e1051a39Sopenharmony_ci    A[0][2] = T[2][2];
184e1051a39Sopenharmony_ci    A[0][3] = T[3][3];
185e1051a39Sopenharmony_ci    A[0][4] = T[4][4];
186e1051a39Sopenharmony_ci
187e1051a39Sopenharmony_ci    A[1][0] = T[0][3];
188e1051a39Sopenharmony_ci    A[1][1] = T[1][4];
189e1051a39Sopenharmony_ci    A[1][2] = T[2][0];
190e1051a39Sopenharmony_ci    A[1][3] = T[3][1];
191e1051a39Sopenharmony_ci    A[1][4] = T[4][2];
192e1051a39Sopenharmony_ci
193e1051a39Sopenharmony_ci    A[2][0] = T[0][1];
194e1051a39Sopenharmony_ci    A[2][1] = T[1][2];
195e1051a39Sopenharmony_ci    A[2][2] = T[2][3];
196e1051a39Sopenharmony_ci    A[2][3] = T[3][4];
197e1051a39Sopenharmony_ci    A[2][4] = T[4][0];
198e1051a39Sopenharmony_ci
199e1051a39Sopenharmony_ci    A[3][0] = T[0][4];
200e1051a39Sopenharmony_ci    A[3][1] = T[1][0];
201e1051a39Sopenharmony_ci    A[3][2] = T[2][1];
202e1051a39Sopenharmony_ci    A[3][3] = T[3][2];
203e1051a39Sopenharmony_ci    A[3][4] = T[4][3];
204e1051a39Sopenharmony_ci
205e1051a39Sopenharmony_ci    A[4][0] = T[0][2];
206e1051a39Sopenharmony_ci    A[4][1] = T[1][3];
207e1051a39Sopenharmony_ci    A[4][2] = T[2][4];
208e1051a39Sopenharmony_ci    A[4][3] = T[3][0];
209e1051a39Sopenharmony_ci    A[4][4] = T[4][1];
210e1051a39Sopenharmony_ci}
211e1051a39Sopenharmony_ci
212e1051a39Sopenharmony_cistatic void Chi(uint64_t A[5][5])
213e1051a39Sopenharmony_ci{
214e1051a39Sopenharmony_ci    uint64_t C[5];
215e1051a39Sopenharmony_ci    size_t y;
216e1051a39Sopenharmony_ci
217e1051a39Sopenharmony_ci    for (y = 0; y < 5; y++) {
218e1051a39Sopenharmony_ci        C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
219e1051a39Sopenharmony_ci        C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
220e1051a39Sopenharmony_ci        C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
221e1051a39Sopenharmony_ci        C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
222e1051a39Sopenharmony_ci        C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
223e1051a39Sopenharmony_ci
224e1051a39Sopenharmony_ci        A[y][0] = C[0];
225e1051a39Sopenharmony_ci        A[y][1] = C[1];
226e1051a39Sopenharmony_ci        A[y][2] = C[2];
227e1051a39Sopenharmony_ci        A[y][3] = C[3];
228e1051a39Sopenharmony_ci        A[y][4] = C[4];
229e1051a39Sopenharmony_ci    }
230e1051a39Sopenharmony_ci}
231e1051a39Sopenharmony_ci
232e1051a39Sopenharmony_cistatic void Iota(uint64_t A[5][5], size_t i)
233e1051a39Sopenharmony_ci{
234e1051a39Sopenharmony_ci    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
235e1051a39Sopenharmony_ci    A[0][0] ^= iotas[i];
236e1051a39Sopenharmony_ci}
237e1051a39Sopenharmony_ci
238e1051a39Sopenharmony_cistatic void KeccakF1600(uint64_t A[5][5])
239e1051a39Sopenharmony_ci{
240e1051a39Sopenharmony_ci    size_t i;
241e1051a39Sopenharmony_ci
242e1051a39Sopenharmony_ci    for (i = 0; i < 24; i++) {
243e1051a39Sopenharmony_ci        Theta(A);
244e1051a39Sopenharmony_ci        Rho(A);
245e1051a39Sopenharmony_ci        Pi(A);
246e1051a39Sopenharmony_ci        Chi(A);
247e1051a39Sopenharmony_ci        Iota(A, i);
248e1051a39Sopenharmony_ci    }
249e1051a39Sopenharmony_ci}
250e1051a39Sopenharmony_ci
251e1051a39Sopenharmony_ci#elif defined(KECCAK_1X)
252e1051a39Sopenharmony_ci/*
253e1051a39Sopenharmony_ci * This implementation is optimization of above code featuring unroll
254e1051a39Sopenharmony_ci * of even y-loops, their fusion and code motion. It also minimizes
255e1051a39Sopenharmony_ci * temporary storage. Compiler would normally do all these things for
256e1051a39Sopenharmony_ci * you, purpose of manual optimization is to provide "unobscured"
257e1051a39Sopenharmony_ci * reference for assembly implementation [in case this approach is
258e1051a39Sopenharmony_ci * chosen for implementation on some platform]. In the nutshell it's
259e1051a39Sopenharmony_ci * equivalent of "plane-per-plane processing" approach discussed in
260e1051a39Sopenharmony_ci * section 2.4 of "Keccak implementation overview".
261e1051a39Sopenharmony_ci */
262e1051a39Sopenharmony_cistatic void Round(uint64_t A[5][5], size_t i)
263e1051a39Sopenharmony_ci{
264e1051a39Sopenharmony_ci    uint64_t C[5], E[2];        /* registers */
265e1051a39Sopenharmony_ci    uint64_t D[5], T[2][5];     /* memory    */
266e1051a39Sopenharmony_ci
267e1051a39Sopenharmony_ci    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
268e1051a39Sopenharmony_ci
269e1051a39Sopenharmony_ci    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
270e1051a39Sopenharmony_ci    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
271e1051a39Sopenharmony_ci    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
272e1051a39Sopenharmony_ci    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
273e1051a39Sopenharmony_ci    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
274e1051a39Sopenharmony_ci
275e1051a39Sopenharmony_ci#if defined(__arm__)
276e1051a39Sopenharmony_ci    D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
277e1051a39Sopenharmony_ci    D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
278e1051a39Sopenharmony_ci    D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
279e1051a39Sopenharmony_ci    D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
280e1051a39Sopenharmony_ci    D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
281e1051a39Sopenharmony_ci
282e1051a39Sopenharmony_ci    T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
283e1051a39Sopenharmony_ci    T[0][1] = A[0][1] ^ E[0]; /* D[1] */
284e1051a39Sopenharmony_ci    T[0][2] = A[0][2] ^ C[1]; /* D[2] */
285e1051a39Sopenharmony_ci    T[0][3] = A[0][3] ^ C[2]; /* D[3] */
286e1051a39Sopenharmony_ci    T[0][4] = A[0][4] ^ E[1]; /* D[4] */
287e1051a39Sopenharmony_ci
288e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
289e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
290e1051a39Sopenharmony_ci    C[0] =       A[0][0] ^ C[0]; /* rotate by 0 */  /* D[0] */
291e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);   /* D[2] */
292e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);   /* D[1] */
293e1051a39Sopenharmony_ci#else
294e1051a39Sopenharmony_ci    D[0] = ROL64(C[1], 1) ^ C[4];
295e1051a39Sopenharmony_ci    D[1] = ROL64(C[2], 1) ^ C[0];
296e1051a39Sopenharmony_ci    D[2] = ROL64(C[3], 1) ^ C[1];
297e1051a39Sopenharmony_ci    D[3] = ROL64(C[4], 1) ^ C[2];
298e1051a39Sopenharmony_ci    D[4] = ROL64(C[0], 1) ^ C[3];
299e1051a39Sopenharmony_ci
300e1051a39Sopenharmony_ci    T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
301e1051a39Sopenharmony_ci    T[0][1] = A[0][1] ^ D[1];
302e1051a39Sopenharmony_ci    T[0][2] = A[0][2] ^ D[2];
303e1051a39Sopenharmony_ci    T[0][3] = A[0][3] ^ D[3];
304e1051a39Sopenharmony_ci    T[0][4] = A[0][4] ^ D[4];
305e1051a39Sopenharmony_ci
306e1051a39Sopenharmony_ci    C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
307e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
308e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
309e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
310e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
311e1051a39Sopenharmony_ci#endif
312e1051a39Sopenharmony_ci    A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
313e1051a39Sopenharmony_ci    A[0][1] = C[1] ^ (~C[2] & C[3]);
314e1051a39Sopenharmony_ci    A[0][2] = C[2] ^ (~C[3] & C[4]);
315e1051a39Sopenharmony_ci    A[0][3] = C[3] ^ (~C[4] & C[0]);
316e1051a39Sopenharmony_ci    A[0][4] = C[4] ^ (~C[0] & C[1]);
317e1051a39Sopenharmony_ci
318e1051a39Sopenharmony_ci    T[1][0] = A[1][0] ^ (C[3] = D[0]);
319e1051a39Sopenharmony_ci    T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
320e1051a39Sopenharmony_ci    T[1][2] = A[1][2] ^ (E[0] = D[2]);
321e1051a39Sopenharmony_ci    T[1][3] = A[1][3] ^ (E[1] = D[3]);
322e1051a39Sopenharmony_ci    T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
323e1051a39Sopenharmony_ci
324e1051a39Sopenharmony_ci    C[0] = ROL64(T[0][3],        rhotates[0][3]);
325e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]);   /* D[4] */
326e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]);   /* D[0] */
327e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]);   /* D[1] */
328e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]);   /* D[2] */
329e1051a39Sopenharmony_ci
330e1051a39Sopenharmony_ci    A[1][0] = C[0] ^ (~C[1] & C[2]);
331e1051a39Sopenharmony_ci    A[1][1] = C[1] ^ (~C[2] & C[3]);
332e1051a39Sopenharmony_ci    A[1][2] = C[2] ^ (~C[3] & C[4]);
333e1051a39Sopenharmony_ci    A[1][3] = C[3] ^ (~C[4] & C[0]);
334e1051a39Sopenharmony_ci    A[1][4] = C[4] ^ (~C[0] & C[1]);
335e1051a39Sopenharmony_ci
336e1051a39Sopenharmony_ci    C[0] = ROL64(T[0][1],        rhotates[0][1]);
337e1051a39Sopenharmony_ci    C[1] = ROL64(T[1][2],        rhotates[1][2]);
338e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
339e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
340e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
341e1051a39Sopenharmony_ci
342e1051a39Sopenharmony_ci    A[2][0] = C[0] ^ (~C[1] & C[2]);
343e1051a39Sopenharmony_ci    A[2][1] = C[1] ^ (~C[2] & C[3]);
344e1051a39Sopenharmony_ci    A[2][2] = C[2] ^ (~C[3] & C[4]);
345e1051a39Sopenharmony_ci    A[2][3] = C[3] ^ (~C[4] & C[0]);
346e1051a39Sopenharmony_ci    A[2][4] = C[4] ^ (~C[0] & C[1]);
347e1051a39Sopenharmony_ci
348e1051a39Sopenharmony_ci    C[0] = ROL64(T[0][4],        rhotates[0][4]);
349e1051a39Sopenharmony_ci    C[1] = ROL64(T[1][0],        rhotates[1][0]);
350e1051a39Sopenharmony_ci    C[2] = ROL64(T[1][1],        rhotates[2][1]); /* originally A[2][1] */
351e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
352e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
353e1051a39Sopenharmony_ci
354e1051a39Sopenharmony_ci    A[3][0] = C[0] ^ (~C[1] & C[2]);
355e1051a39Sopenharmony_ci    A[3][1] = C[1] ^ (~C[2] & C[3]);
356e1051a39Sopenharmony_ci    A[3][2] = C[2] ^ (~C[3] & C[4]);
357e1051a39Sopenharmony_ci    A[3][3] = C[3] ^ (~C[4] & C[0]);
358e1051a39Sopenharmony_ci    A[3][4] = C[4] ^ (~C[0] & C[1]);
359e1051a39Sopenharmony_ci
360e1051a39Sopenharmony_ci    C[0] = ROL64(T[0][2],        rhotates[0][2]);
361e1051a39Sopenharmony_ci    C[1] = ROL64(T[1][3],        rhotates[1][3]);
362e1051a39Sopenharmony_ci    C[2] = ROL64(T[1][4],        rhotates[2][4]); /* originally A[2][4] */
363e1051a39Sopenharmony_ci    C[3] = ROL64(T[0][0],        rhotates[3][0]); /* originally A[3][0] */
364e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
365e1051a39Sopenharmony_ci
366e1051a39Sopenharmony_ci    A[4][0] = C[0] ^ (~C[1] & C[2]);
367e1051a39Sopenharmony_ci    A[4][1] = C[1] ^ (~C[2] & C[3]);
368e1051a39Sopenharmony_ci    A[4][2] = C[2] ^ (~C[3] & C[4]);
369e1051a39Sopenharmony_ci    A[4][3] = C[3] ^ (~C[4] & C[0]);
370e1051a39Sopenharmony_ci    A[4][4] = C[4] ^ (~C[0] & C[1]);
371e1051a39Sopenharmony_ci}
372e1051a39Sopenharmony_ci
373e1051a39Sopenharmony_cistatic void KeccakF1600(uint64_t A[5][5])
374e1051a39Sopenharmony_ci{
375e1051a39Sopenharmony_ci    size_t i;
376e1051a39Sopenharmony_ci
377e1051a39Sopenharmony_ci    for (i = 0; i < 24; i++) {
378e1051a39Sopenharmony_ci        Round(A, i);
379e1051a39Sopenharmony_ci    }
380e1051a39Sopenharmony_ci}
381e1051a39Sopenharmony_ci
382e1051a39Sopenharmony_ci#elif defined(KECCAK_1X_ALT)
383e1051a39Sopenharmony_ci/*
384e1051a39Sopenharmony_ci * This is variant of above KECCAK_1X that reduces requirement for
385e1051a39Sopenharmony_ci * temporary storage even further, but at cost of more updates to A[][].
386e1051a39Sopenharmony_ci * It's less suitable if A[][] is memory bound, but better if it's
387e1051a39Sopenharmony_ci * register bound.
388e1051a39Sopenharmony_ci */
389e1051a39Sopenharmony_ci
390e1051a39Sopenharmony_cistatic void Round(uint64_t A[5][5], size_t i)
391e1051a39Sopenharmony_ci{
392e1051a39Sopenharmony_ci    uint64_t C[5], D[5];
393e1051a39Sopenharmony_ci
394e1051a39Sopenharmony_ci    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
395e1051a39Sopenharmony_ci
396e1051a39Sopenharmony_ci    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
397e1051a39Sopenharmony_ci    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
398e1051a39Sopenharmony_ci    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
399e1051a39Sopenharmony_ci    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
400e1051a39Sopenharmony_ci    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
401e1051a39Sopenharmony_ci
402e1051a39Sopenharmony_ci    D[1] = C[0] ^  ROL64(C[2], 1);
403e1051a39Sopenharmony_ci    D[2] = C[1] ^  ROL64(C[3], 1);
404e1051a39Sopenharmony_ci    D[3] = C[2] ^= ROL64(C[4], 1);
405e1051a39Sopenharmony_ci    D[4] = C[3] ^= ROL64(C[0], 1);
406e1051a39Sopenharmony_ci    D[0] = C[4] ^= ROL64(C[1], 1);
407e1051a39Sopenharmony_ci
408e1051a39Sopenharmony_ci    A[0][1] ^= D[1];
409e1051a39Sopenharmony_ci    A[1][1] ^= D[1];
410e1051a39Sopenharmony_ci    A[2][1] ^= D[1];
411e1051a39Sopenharmony_ci    A[3][1] ^= D[1];
412e1051a39Sopenharmony_ci    A[4][1] ^= D[1];
413e1051a39Sopenharmony_ci
414e1051a39Sopenharmony_ci    A[0][2] ^= D[2];
415e1051a39Sopenharmony_ci    A[1][2] ^= D[2];
416e1051a39Sopenharmony_ci    A[2][2] ^= D[2];
417e1051a39Sopenharmony_ci    A[3][2] ^= D[2];
418e1051a39Sopenharmony_ci    A[4][2] ^= D[2];
419e1051a39Sopenharmony_ci
420e1051a39Sopenharmony_ci    A[0][3] ^= C[2];
421e1051a39Sopenharmony_ci    A[1][3] ^= C[2];
422e1051a39Sopenharmony_ci    A[2][3] ^= C[2];
423e1051a39Sopenharmony_ci    A[3][3] ^= C[2];
424e1051a39Sopenharmony_ci    A[4][3] ^= C[2];
425e1051a39Sopenharmony_ci
426e1051a39Sopenharmony_ci    A[0][4] ^= C[3];
427e1051a39Sopenharmony_ci    A[1][4] ^= C[3];
428e1051a39Sopenharmony_ci    A[2][4] ^= C[3];
429e1051a39Sopenharmony_ci    A[3][4] ^= C[3];
430e1051a39Sopenharmony_ci    A[4][4] ^= C[3];
431e1051a39Sopenharmony_ci
432e1051a39Sopenharmony_ci    A[0][0] ^= C[4];
433e1051a39Sopenharmony_ci    A[1][0] ^= C[4];
434e1051a39Sopenharmony_ci    A[2][0] ^= C[4];
435e1051a39Sopenharmony_ci    A[3][0] ^= C[4];
436e1051a39Sopenharmony_ci    A[4][0] ^= C[4];
437e1051a39Sopenharmony_ci
438e1051a39Sopenharmony_ci    C[1] = A[0][1];
439e1051a39Sopenharmony_ci    C[2] = A[0][2];
440e1051a39Sopenharmony_ci    C[3] = A[0][3];
441e1051a39Sopenharmony_ci    C[4] = A[0][4];
442e1051a39Sopenharmony_ci
443e1051a39Sopenharmony_ci    A[0][1] = ROL64(A[1][1], rhotates[1][1]);
444e1051a39Sopenharmony_ci    A[0][2] = ROL64(A[2][2], rhotates[2][2]);
445e1051a39Sopenharmony_ci    A[0][3] = ROL64(A[3][3], rhotates[3][3]);
446e1051a39Sopenharmony_ci    A[0][4] = ROL64(A[4][4], rhotates[4][4]);
447e1051a39Sopenharmony_ci
448e1051a39Sopenharmony_ci    A[1][1] = ROL64(A[1][4], rhotates[1][4]);
449e1051a39Sopenharmony_ci    A[2][2] = ROL64(A[2][3], rhotates[2][3]);
450e1051a39Sopenharmony_ci    A[3][3] = ROL64(A[3][2], rhotates[3][2]);
451e1051a39Sopenharmony_ci    A[4][4] = ROL64(A[4][1], rhotates[4][1]);
452e1051a39Sopenharmony_ci
453e1051a39Sopenharmony_ci    A[1][4] = ROL64(A[4][2], rhotates[4][2]);
454e1051a39Sopenharmony_ci    A[2][3] = ROL64(A[3][4], rhotates[3][4]);
455e1051a39Sopenharmony_ci    A[3][2] = ROL64(A[2][1], rhotates[2][1]);
456e1051a39Sopenharmony_ci    A[4][1] = ROL64(A[1][3], rhotates[1][3]);
457e1051a39Sopenharmony_ci
458e1051a39Sopenharmony_ci    A[4][2] = ROL64(A[2][4], rhotates[2][4]);
459e1051a39Sopenharmony_ci    A[3][4] = ROL64(A[4][3], rhotates[4][3]);
460e1051a39Sopenharmony_ci    A[2][1] = ROL64(A[1][2], rhotates[1][2]);
461e1051a39Sopenharmony_ci    A[1][3] = ROL64(A[3][1], rhotates[3][1]);
462e1051a39Sopenharmony_ci
463e1051a39Sopenharmony_ci    A[2][4] = ROL64(A[4][0], rhotates[4][0]);
464e1051a39Sopenharmony_ci    A[4][3] = ROL64(A[3][0], rhotates[3][0]);
465e1051a39Sopenharmony_ci    A[1][2] = ROL64(A[2][0], rhotates[2][0]);
466e1051a39Sopenharmony_ci    A[3][1] = ROL64(A[1][0], rhotates[1][0]);
467e1051a39Sopenharmony_ci
468e1051a39Sopenharmony_ci    A[1][0] = ROL64(C[3],    rhotates[0][3]);
469e1051a39Sopenharmony_ci    A[2][0] = ROL64(C[1],    rhotates[0][1]);
470e1051a39Sopenharmony_ci    A[3][0] = ROL64(C[4],    rhotates[0][4]);
471e1051a39Sopenharmony_ci    A[4][0] = ROL64(C[2],    rhotates[0][2]);
472e1051a39Sopenharmony_ci
473e1051a39Sopenharmony_ci    C[0] = A[0][0];
474e1051a39Sopenharmony_ci    C[1] = A[1][0];
475e1051a39Sopenharmony_ci    D[0] = A[0][1];
476e1051a39Sopenharmony_ci    D[1] = A[1][1];
477e1051a39Sopenharmony_ci
478e1051a39Sopenharmony_ci    A[0][0] ^= (~A[0][1] & A[0][2]);
479e1051a39Sopenharmony_ci    A[1][0] ^= (~A[1][1] & A[1][2]);
480e1051a39Sopenharmony_ci    A[0][1] ^= (~A[0][2] & A[0][3]);
481e1051a39Sopenharmony_ci    A[1][1] ^= (~A[1][2] & A[1][3]);
482e1051a39Sopenharmony_ci    A[0][2] ^= (~A[0][3] & A[0][4]);
483e1051a39Sopenharmony_ci    A[1][2] ^= (~A[1][3] & A[1][4]);
484e1051a39Sopenharmony_ci    A[0][3] ^= (~A[0][4] & C[0]);
485e1051a39Sopenharmony_ci    A[1][3] ^= (~A[1][4] & C[1]);
486e1051a39Sopenharmony_ci    A[0][4] ^= (~C[0]    & D[0]);
487e1051a39Sopenharmony_ci    A[1][4] ^= (~C[1]    & D[1]);
488e1051a39Sopenharmony_ci
489e1051a39Sopenharmony_ci    C[2] = A[2][0];
490e1051a39Sopenharmony_ci    C[3] = A[3][0];
491e1051a39Sopenharmony_ci    D[2] = A[2][1];
492e1051a39Sopenharmony_ci    D[3] = A[3][1];
493e1051a39Sopenharmony_ci
494e1051a39Sopenharmony_ci    A[2][0] ^= (~A[2][1] & A[2][2]);
495e1051a39Sopenharmony_ci    A[3][0] ^= (~A[3][1] & A[3][2]);
496e1051a39Sopenharmony_ci    A[2][1] ^= (~A[2][2] & A[2][3]);
497e1051a39Sopenharmony_ci    A[3][1] ^= (~A[3][2] & A[3][3]);
498e1051a39Sopenharmony_ci    A[2][2] ^= (~A[2][3] & A[2][4]);
499e1051a39Sopenharmony_ci    A[3][2] ^= (~A[3][3] & A[3][4]);
500e1051a39Sopenharmony_ci    A[2][3] ^= (~A[2][4] & C[2]);
501e1051a39Sopenharmony_ci    A[3][3] ^= (~A[3][4] & C[3]);
502e1051a39Sopenharmony_ci    A[2][4] ^= (~C[2]    & D[2]);
503e1051a39Sopenharmony_ci    A[3][4] ^= (~C[3]    & D[3]);
504e1051a39Sopenharmony_ci
505e1051a39Sopenharmony_ci    C[4] = A[4][0];
506e1051a39Sopenharmony_ci    D[4] = A[4][1];
507e1051a39Sopenharmony_ci
508e1051a39Sopenharmony_ci    A[4][0] ^= (~A[4][1] & A[4][2]);
509e1051a39Sopenharmony_ci    A[4][1] ^= (~A[4][2] & A[4][3]);
510e1051a39Sopenharmony_ci    A[4][2] ^= (~A[4][3] & A[4][4]);
511e1051a39Sopenharmony_ci    A[4][3] ^= (~A[4][4] & C[4]);
512e1051a39Sopenharmony_ci    A[4][4] ^= (~C[4]    & D[4]);
513e1051a39Sopenharmony_ci    A[0][0] ^= iotas[i];
514e1051a39Sopenharmony_ci}
515e1051a39Sopenharmony_ci
516e1051a39Sopenharmony_cistatic void KeccakF1600(uint64_t A[5][5])
517e1051a39Sopenharmony_ci{
518e1051a39Sopenharmony_ci    size_t i;
519e1051a39Sopenharmony_ci
520e1051a39Sopenharmony_ci    for (i = 0; i < 24; i++) {
521e1051a39Sopenharmony_ci        Round(A, i);
522e1051a39Sopenharmony_ci    }
523e1051a39Sopenharmony_ci}
524e1051a39Sopenharmony_ci
525e1051a39Sopenharmony_ci#elif defined(KECCAK_2X)
526e1051a39Sopenharmony_ci/*
527e1051a39Sopenharmony_ci * This implementation is variant of KECCAK_1X above with outer-most
528e1051a39Sopenharmony_ci * round loop unrolled twice. This allows to take temporary storage
529e1051a39Sopenharmony_ci * out of round procedure and simplify references to it by alternating
530e1051a39Sopenharmony_ci * it with actual data (see round loop below). Originally it was meant
531e1051a39Sopenharmony_ci * rather as reference for an assembly implementation, but it seems to
532e1051a39Sopenharmony_ci * play best with compilers [as well as provide best instruction per
533e1051a39Sopenharmony_ci * processed byte ratio at minimal round unroll factor]...
534e1051a39Sopenharmony_ci */
535e1051a39Sopenharmony_cistatic void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
536e1051a39Sopenharmony_ci{
537e1051a39Sopenharmony_ci    uint64_t C[5], D[5];
538e1051a39Sopenharmony_ci
539e1051a39Sopenharmony_ci    assert(i < (sizeof(iotas) / sizeof(iotas[0])));
540e1051a39Sopenharmony_ci
541e1051a39Sopenharmony_ci    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
542e1051a39Sopenharmony_ci    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
543e1051a39Sopenharmony_ci    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
544e1051a39Sopenharmony_ci    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
545e1051a39Sopenharmony_ci    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
546e1051a39Sopenharmony_ci
547e1051a39Sopenharmony_ci    D[0] = ROL64(C[1], 1) ^ C[4];
548e1051a39Sopenharmony_ci    D[1] = ROL64(C[2], 1) ^ C[0];
549e1051a39Sopenharmony_ci    D[2] = ROL64(C[3], 1) ^ C[1];
550e1051a39Sopenharmony_ci    D[3] = ROL64(C[4], 1) ^ C[2];
551e1051a39Sopenharmony_ci    D[4] = ROL64(C[0], 1) ^ C[3];
552e1051a39Sopenharmony_ci
553e1051a39Sopenharmony_ci    C[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
554e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
555e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
556e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
557e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
558e1051a39Sopenharmony_ci
559e1051a39Sopenharmony_ci#ifdef KECCAK_COMPLEMENTING_TRANSFORM
560e1051a39Sopenharmony_ci    R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
561e1051a39Sopenharmony_ci    R[0][1] = C[1] ^ (~C[2] | C[3]);
562e1051a39Sopenharmony_ci    R[0][2] = C[2] ^ ( C[3] & C[4]);
563e1051a39Sopenharmony_ci    R[0][3] = C[3] ^ ( C[4] | C[0]);
564e1051a39Sopenharmony_ci    R[0][4] = C[4] ^ ( C[0] & C[1]);
565e1051a39Sopenharmony_ci#else
566e1051a39Sopenharmony_ci    R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
567e1051a39Sopenharmony_ci    R[0][1] = C[1] ^ (~C[2] & C[3]);
568e1051a39Sopenharmony_ci    R[0][2] = C[2] ^ (~C[3] & C[4]);
569e1051a39Sopenharmony_ci    R[0][3] = C[3] ^ (~C[4] & C[0]);
570e1051a39Sopenharmony_ci    R[0][4] = C[4] ^ (~C[0] & C[1]);
571e1051a39Sopenharmony_ci#endif
572e1051a39Sopenharmony_ci
573e1051a39Sopenharmony_ci    C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
574e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
575e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
576e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
577e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
578e1051a39Sopenharmony_ci
579e1051a39Sopenharmony_ci#ifdef KECCAK_COMPLEMENTING_TRANSFORM
580e1051a39Sopenharmony_ci    R[1][0] = C[0] ^ (C[1] |  C[2]);
581e1051a39Sopenharmony_ci    R[1][1] = C[1] ^ (C[2] &  C[3]);
582e1051a39Sopenharmony_ci    R[1][2] = C[2] ^ (C[3] | ~C[4]);
583e1051a39Sopenharmony_ci    R[1][3] = C[3] ^ (C[4] |  C[0]);
584e1051a39Sopenharmony_ci    R[1][4] = C[4] ^ (C[0] &  C[1]);
585e1051a39Sopenharmony_ci#else
586e1051a39Sopenharmony_ci    R[1][0] = C[0] ^ (~C[1] & C[2]);
587e1051a39Sopenharmony_ci    R[1][1] = C[1] ^ (~C[2] & C[3]);
588e1051a39Sopenharmony_ci    R[1][2] = C[2] ^ (~C[3] & C[4]);
589e1051a39Sopenharmony_ci    R[1][3] = C[3] ^ (~C[4] & C[0]);
590e1051a39Sopenharmony_ci    R[1][4] = C[4] ^ (~C[0] & C[1]);
591e1051a39Sopenharmony_ci#endif
592e1051a39Sopenharmony_ci
593e1051a39Sopenharmony_ci    C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
594e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
595e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
596e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
597e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
598e1051a39Sopenharmony_ci
599e1051a39Sopenharmony_ci#ifdef KECCAK_COMPLEMENTING_TRANSFORM
600e1051a39Sopenharmony_ci    R[2][0] =  C[0] ^ ( C[1] | C[2]);
601e1051a39Sopenharmony_ci    R[2][1] =  C[1] ^ ( C[2] & C[3]);
602e1051a39Sopenharmony_ci    R[2][2] =  C[2] ^ (~C[3] & C[4]);
603e1051a39Sopenharmony_ci    R[2][3] = ~C[3] ^ ( C[4] | C[0]);
604e1051a39Sopenharmony_ci    R[2][4] =  C[4] ^ ( C[0] & C[1]);
605e1051a39Sopenharmony_ci#else
606e1051a39Sopenharmony_ci    R[2][0] = C[0] ^ (~C[1] & C[2]);
607e1051a39Sopenharmony_ci    R[2][1] = C[1] ^ (~C[2] & C[3]);
608e1051a39Sopenharmony_ci    R[2][2] = C[2] ^ (~C[3] & C[4]);
609e1051a39Sopenharmony_ci    R[2][3] = C[3] ^ (~C[4] & C[0]);
610e1051a39Sopenharmony_ci    R[2][4] = C[4] ^ (~C[0] & C[1]);
611e1051a39Sopenharmony_ci#endif
612e1051a39Sopenharmony_ci
613e1051a39Sopenharmony_ci    C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
614e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
615e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
616e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
617e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
618e1051a39Sopenharmony_ci
619e1051a39Sopenharmony_ci#ifdef KECCAK_COMPLEMENTING_TRANSFORM
620e1051a39Sopenharmony_ci    R[3][0] =  C[0] ^ ( C[1] & C[2]);
621e1051a39Sopenharmony_ci    R[3][1] =  C[1] ^ ( C[2] | C[3]);
622e1051a39Sopenharmony_ci    R[3][2] =  C[2] ^ (~C[3] | C[4]);
623e1051a39Sopenharmony_ci    R[3][3] = ~C[3] ^ ( C[4] & C[0]);
624e1051a39Sopenharmony_ci    R[3][4] =  C[4] ^ ( C[0] | C[1]);
625e1051a39Sopenharmony_ci#else
626e1051a39Sopenharmony_ci    R[3][0] = C[0] ^ (~C[1] & C[2]);
627e1051a39Sopenharmony_ci    R[3][1] = C[1] ^ (~C[2] & C[3]);
628e1051a39Sopenharmony_ci    R[3][2] = C[2] ^ (~C[3] & C[4]);
629e1051a39Sopenharmony_ci    R[3][3] = C[3] ^ (~C[4] & C[0]);
630e1051a39Sopenharmony_ci    R[3][4] = C[4] ^ (~C[0] & C[1]);
631e1051a39Sopenharmony_ci#endif
632e1051a39Sopenharmony_ci
633e1051a39Sopenharmony_ci    C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
634e1051a39Sopenharmony_ci    C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
635e1051a39Sopenharmony_ci    C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
636e1051a39Sopenharmony_ci    C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
637e1051a39Sopenharmony_ci    C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
638e1051a39Sopenharmony_ci
639e1051a39Sopenharmony_ci#ifdef KECCAK_COMPLEMENTING_TRANSFORM
640e1051a39Sopenharmony_ci    R[4][0] =  C[0] ^ (~C[1] & C[2]);
641e1051a39Sopenharmony_ci    R[4][1] = ~C[1] ^ ( C[2] | C[3]);
642e1051a39Sopenharmony_ci    R[4][2] =  C[2] ^ ( C[3] & C[4]);
643e1051a39Sopenharmony_ci    R[4][3] =  C[3] ^ ( C[4] | C[0]);
644e1051a39Sopenharmony_ci    R[4][4] =  C[4] ^ ( C[0] & C[1]);
645e1051a39Sopenharmony_ci#else
646e1051a39Sopenharmony_ci    R[4][0] = C[0] ^ (~C[1] & C[2]);
647e1051a39Sopenharmony_ci    R[4][1] = C[1] ^ (~C[2] & C[3]);
648e1051a39Sopenharmony_ci    R[4][2] = C[2] ^ (~C[3] & C[4]);
649e1051a39Sopenharmony_ci    R[4][3] = C[3] ^ (~C[4] & C[0]);
650e1051a39Sopenharmony_ci    R[4][4] = C[4] ^ (~C[0] & C[1]);
651e1051a39Sopenharmony_ci#endif
652e1051a39Sopenharmony_ci}
653e1051a39Sopenharmony_ci
654e1051a39Sopenharmony_cistatic void KeccakF1600(uint64_t A[5][5])
655e1051a39Sopenharmony_ci{
656e1051a39Sopenharmony_ci    uint64_t T[5][5];
657e1051a39Sopenharmony_ci    size_t i;
658e1051a39Sopenharmony_ci
659e1051a39Sopenharmony_ci#ifdef KECCAK_COMPLEMENTING_TRANSFORM
660e1051a39Sopenharmony_ci    A[0][1] = ~A[0][1];
661e1051a39Sopenharmony_ci    A[0][2] = ~A[0][2];
662e1051a39Sopenharmony_ci    A[1][3] = ~A[1][3];
663e1051a39Sopenharmony_ci    A[2][2] = ~A[2][2];
664e1051a39Sopenharmony_ci    A[3][2] = ~A[3][2];
665e1051a39Sopenharmony_ci    A[4][0] = ~A[4][0];
666e1051a39Sopenharmony_ci#endif
667e1051a39Sopenharmony_ci
668e1051a39Sopenharmony_ci    for (i = 0; i < 24; i += 2) {
669e1051a39Sopenharmony_ci        Round(T, A, i);
670e1051a39Sopenharmony_ci        Round(A, T, i + 1);
671e1051a39Sopenharmony_ci    }
672e1051a39Sopenharmony_ci
673e1051a39Sopenharmony_ci#ifdef KECCAK_COMPLEMENTING_TRANSFORM
674e1051a39Sopenharmony_ci    A[0][1] = ~A[0][1];
675e1051a39Sopenharmony_ci    A[0][2] = ~A[0][2];
676e1051a39Sopenharmony_ci    A[1][3] = ~A[1][3];
677e1051a39Sopenharmony_ci    A[2][2] = ~A[2][2];
678e1051a39Sopenharmony_ci    A[3][2] = ~A[3][2];
679e1051a39Sopenharmony_ci    A[4][0] = ~A[4][0];
680e1051a39Sopenharmony_ci#endif
681e1051a39Sopenharmony_ci}
682e1051a39Sopenharmony_ci
683e1051a39Sopenharmony_ci#else   /* define KECCAK_INPLACE to compile this code path */
684e1051a39Sopenharmony_ci/*
685e1051a39Sopenharmony_ci * This implementation is KECCAK_1X from above combined 4 times with
686e1051a39Sopenharmony_ci * a twist that allows to omit temporary storage and perform in-place
687e1051a39Sopenharmony_ci * processing. It's discussed in section 2.5 of "Keccak implementation
688e1051a39Sopenharmony_ci * overview". It's likely to be best suited for processors with large
689e1051a39Sopenharmony_ci * register bank... On the other hand processor with large register
690e1051a39Sopenharmony_ci * bank can as well use KECCAK_1X_ALT, it would be as fast but much
691e1051a39Sopenharmony_ci * more compact...
692e1051a39Sopenharmony_ci */
693e1051a39Sopenharmony_cistatic void FourRounds(uint64_t A[5][5], size_t i)
694e1051a39Sopenharmony_ci{
695e1051a39Sopenharmony_ci    uint64_t B[5], C[5], D[5];
696e1051a39Sopenharmony_ci
697e1051a39Sopenharmony_ci    assert(i <= (sizeof(iotas) / sizeof(iotas[0]) - 4));
698e1051a39Sopenharmony_ci
699e1051a39Sopenharmony_ci    /* Round 4*n */
700e1051a39Sopenharmony_ci    C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
701e1051a39Sopenharmony_ci    C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
702e1051a39Sopenharmony_ci    C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
703e1051a39Sopenharmony_ci    C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
704e1051a39Sopenharmony_ci    C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
705e1051a39Sopenharmony_ci
706e1051a39Sopenharmony_ci    D[0] = ROL64(C[1], 1) ^ C[4];
707e1051a39Sopenharmony_ci    D[1] = ROL64(C[2], 1) ^ C[0];
708e1051a39Sopenharmony_ci    D[2] = ROL64(C[3], 1) ^ C[1];
709e1051a39Sopenharmony_ci    D[3] = ROL64(C[4], 1) ^ C[2];
710e1051a39Sopenharmony_ci    D[4] = ROL64(C[0], 1) ^ C[3];
711e1051a39Sopenharmony_ci
712e1051a39Sopenharmony_ci    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
713e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
714e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
715e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
716e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
717e1051a39Sopenharmony_ci
718e1051a39Sopenharmony_ci    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
719e1051a39Sopenharmony_ci    C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
720e1051a39Sopenharmony_ci    C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
721e1051a39Sopenharmony_ci    C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
722e1051a39Sopenharmony_ci    C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
723e1051a39Sopenharmony_ci
724e1051a39Sopenharmony_ci    B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
725e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
726e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
727e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
728e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
729e1051a39Sopenharmony_ci
730e1051a39Sopenharmony_ci    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
731e1051a39Sopenharmony_ci    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
732e1051a39Sopenharmony_ci    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
733e1051a39Sopenharmony_ci    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
734e1051a39Sopenharmony_ci    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
735e1051a39Sopenharmony_ci
736e1051a39Sopenharmony_ci    B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
737e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
738e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
739e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
740e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
741e1051a39Sopenharmony_ci
742e1051a39Sopenharmony_ci    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
743e1051a39Sopenharmony_ci    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
744e1051a39Sopenharmony_ci    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
745e1051a39Sopenharmony_ci    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
746e1051a39Sopenharmony_ci    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
747e1051a39Sopenharmony_ci
748e1051a39Sopenharmony_ci    B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
749e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
750e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
751e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
752e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
753e1051a39Sopenharmony_ci
754e1051a39Sopenharmony_ci    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
755e1051a39Sopenharmony_ci    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
756e1051a39Sopenharmony_ci    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
757e1051a39Sopenharmony_ci    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
758e1051a39Sopenharmony_ci    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
759e1051a39Sopenharmony_ci
760e1051a39Sopenharmony_ci    B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
761e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
762e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
763e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
764e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
765e1051a39Sopenharmony_ci
766e1051a39Sopenharmony_ci    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
767e1051a39Sopenharmony_ci    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
768e1051a39Sopenharmony_ci    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
769e1051a39Sopenharmony_ci    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
770e1051a39Sopenharmony_ci    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
771e1051a39Sopenharmony_ci
772e1051a39Sopenharmony_ci    /* Round 4*n+1 */
773e1051a39Sopenharmony_ci    D[0] = ROL64(C[1], 1) ^ C[4];
774e1051a39Sopenharmony_ci    D[1] = ROL64(C[2], 1) ^ C[0];
775e1051a39Sopenharmony_ci    D[2] = ROL64(C[3], 1) ^ C[1];
776e1051a39Sopenharmony_ci    D[3] = ROL64(C[4], 1) ^ C[2];
777e1051a39Sopenharmony_ci    D[4] = ROL64(C[0], 1) ^ C[3];
778e1051a39Sopenharmony_ci
779e1051a39Sopenharmony_ci    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
780e1051a39Sopenharmony_ci    B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
781e1051a39Sopenharmony_ci    B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
782e1051a39Sopenharmony_ci    B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
783e1051a39Sopenharmony_ci    B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
784e1051a39Sopenharmony_ci
785e1051a39Sopenharmony_ci    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
786e1051a39Sopenharmony_ci    C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
787e1051a39Sopenharmony_ci    C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
788e1051a39Sopenharmony_ci    C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
789e1051a39Sopenharmony_ci    C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
790e1051a39Sopenharmony_ci
791e1051a39Sopenharmony_ci    B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
792e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
793e1051a39Sopenharmony_ci    B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
794e1051a39Sopenharmony_ci    B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
795e1051a39Sopenharmony_ci    B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
796e1051a39Sopenharmony_ci
797e1051a39Sopenharmony_ci    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
798e1051a39Sopenharmony_ci    C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
799e1051a39Sopenharmony_ci    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
800e1051a39Sopenharmony_ci    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
801e1051a39Sopenharmony_ci    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
802e1051a39Sopenharmony_ci
803e1051a39Sopenharmony_ci    B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
804e1051a39Sopenharmony_ci    B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
805e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
806e1051a39Sopenharmony_ci    B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
807e1051a39Sopenharmony_ci    B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
808e1051a39Sopenharmony_ci
809e1051a39Sopenharmony_ci    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
810e1051a39Sopenharmony_ci    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
811e1051a39Sopenharmony_ci    C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
812e1051a39Sopenharmony_ci    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
813e1051a39Sopenharmony_ci    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
814e1051a39Sopenharmony_ci
815e1051a39Sopenharmony_ci    B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
816e1051a39Sopenharmony_ci    B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
817e1051a39Sopenharmony_ci    B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
818e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
819e1051a39Sopenharmony_ci    B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
820e1051a39Sopenharmony_ci
821e1051a39Sopenharmony_ci    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
822e1051a39Sopenharmony_ci    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
823e1051a39Sopenharmony_ci    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
824e1051a39Sopenharmony_ci    C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
825e1051a39Sopenharmony_ci    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
826e1051a39Sopenharmony_ci
827e1051a39Sopenharmony_ci    B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
828e1051a39Sopenharmony_ci    B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
829e1051a39Sopenharmony_ci    B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
830e1051a39Sopenharmony_ci    B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
831e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
832e1051a39Sopenharmony_ci
833e1051a39Sopenharmony_ci    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
834e1051a39Sopenharmony_ci    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
835e1051a39Sopenharmony_ci    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
836e1051a39Sopenharmony_ci    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
837e1051a39Sopenharmony_ci    C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
838e1051a39Sopenharmony_ci
839e1051a39Sopenharmony_ci    /* Round 4*n+2 */
840e1051a39Sopenharmony_ci    D[0] = ROL64(C[1], 1) ^ C[4];
841e1051a39Sopenharmony_ci    D[1] = ROL64(C[2], 1) ^ C[0];
842e1051a39Sopenharmony_ci    D[2] = ROL64(C[3], 1) ^ C[1];
843e1051a39Sopenharmony_ci    D[3] = ROL64(C[4], 1) ^ C[2];
844e1051a39Sopenharmony_ci    D[4] = ROL64(C[0], 1) ^ C[3];
845e1051a39Sopenharmony_ci
846e1051a39Sopenharmony_ci    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
847e1051a39Sopenharmony_ci    B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
848e1051a39Sopenharmony_ci    B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
849e1051a39Sopenharmony_ci    B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
850e1051a39Sopenharmony_ci    B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
851e1051a39Sopenharmony_ci
852e1051a39Sopenharmony_ci    C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
853e1051a39Sopenharmony_ci    C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
854e1051a39Sopenharmony_ci    C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
855e1051a39Sopenharmony_ci    C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
856e1051a39Sopenharmony_ci    C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
857e1051a39Sopenharmony_ci
858e1051a39Sopenharmony_ci    B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
859e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
860e1051a39Sopenharmony_ci    B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
861e1051a39Sopenharmony_ci    B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
862e1051a39Sopenharmony_ci    B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
863e1051a39Sopenharmony_ci
864e1051a39Sopenharmony_ci    C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
865e1051a39Sopenharmony_ci    C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
866e1051a39Sopenharmony_ci    C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
867e1051a39Sopenharmony_ci    C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
868e1051a39Sopenharmony_ci    C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
869e1051a39Sopenharmony_ci
870e1051a39Sopenharmony_ci    B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
871e1051a39Sopenharmony_ci    B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
872e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
873e1051a39Sopenharmony_ci    B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
874e1051a39Sopenharmony_ci    B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
875e1051a39Sopenharmony_ci
876e1051a39Sopenharmony_ci    C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
877e1051a39Sopenharmony_ci    C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
878e1051a39Sopenharmony_ci    C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
879e1051a39Sopenharmony_ci    C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
880e1051a39Sopenharmony_ci    C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
881e1051a39Sopenharmony_ci
882e1051a39Sopenharmony_ci    B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
883e1051a39Sopenharmony_ci    B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
884e1051a39Sopenharmony_ci    B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
885e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
886e1051a39Sopenharmony_ci    B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
887e1051a39Sopenharmony_ci
888e1051a39Sopenharmony_ci    C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
889e1051a39Sopenharmony_ci    C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
890e1051a39Sopenharmony_ci    C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
891e1051a39Sopenharmony_ci    C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
892e1051a39Sopenharmony_ci    C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
893e1051a39Sopenharmony_ci
894e1051a39Sopenharmony_ci    B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
895e1051a39Sopenharmony_ci    B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
896e1051a39Sopenharmony_ci    B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
897e1051a39Sopenharmony_ci    B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
898e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
899e1051a39Sopenharmony_ci
900e1051a39Sopenharmony_ci    C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
901e1051a39Sopenharmony_ci    C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
902e1051a39Sopenharmony_ci    C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
903e1051a39Sopenharmony_ci    C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
904e1051a39Sopenharmony_ci    C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
905e1051a39Sopenharmony_ci
906e1051a39Sopenharmony_ci    /* Round 4*n+3 */
907e1051a39Sopenharmony_ci    D[0] = ROL64(C[1], 1) ^ C[4];
908e1051a39Sopenharmony_ci    D[1] = ROL64(C[2], 1) ^ C[0];
909e1051a39Sopenharmony_ci    D[2] = ROL64(C[3], 1) ^ C[1];
910e1051a39Sopenharmony_ci    D[3] = ROL64(C[4], 1) ^ C[2];
911e1051a39Sopenharmony_ci    D[4] = ROL64(C[0], 1) ^ C[3];
912e1051a39Sopenharmony_ci
913e1051a39Sopenharmony_ci    B[0] =       A[0][0] ^ D[0]; /* rotate by 0 */
914e1051a39Sopenharmony_ci    B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
915e1051a39Sopenharmony_ci    B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
916e1051a39Sopenharmony_ci    B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
917e1051a39Sopenharmony_ci    B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
918e1051a39Sopenharmony_ci
919e1051a39Sopenharmony_ci    /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
920e1051a39Sopenharmony_ci    /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
921e1051a39Sopenharmony_ci    /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
922e1051a39Sopenharmony_ci    /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
923e1051a39Sopenharmony_ci    /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
924e1051a39Sopenharmony_ci
925e1051a39Sopenharmony_ci    B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
926e1051a39Sopenharmony_ci    B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
927e1051a39Sopenharmony_ci    B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
928e1051a39Sopenharmony_ci    B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
929e1051a39Sopenharmony_ci    B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
930e1051a39Sopenharmony_ci
931e1051a39Sopenharmony_ci    /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
932e1051a39Sopenharmony_ci    /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
933e1051a39Sopenharmony_ci    /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
934e1051a39Sopenharmony_ci    /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
935e1051a39Sopenharmony_ci    /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
936e1051a39Sopenharmony_ci
937e1051a39Sopenharmony_ci    B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
938e1051a39Sopenharmony_ci    B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
939e1051a39Sopenharmony_ci    B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
940e1051a39Sopenharmony_ci    B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
941e1051a39Sopenharmony_ci    B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
942e1051a39Sopenharmony_ci
943e1051a39Sopenharmony_ci    /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
944e1051a39Sopenharmony_ci    /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
945e1051a39Sopenharmony_ci    /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
946e1051a39Sopenharmony_ci    /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
947e1051a39Sopenharmony_ci    /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
948e1051a39Sopenharmony_ci
949e1051a39Sopenharmony_ci    B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
950e1051a39Sopenharmony_ci    B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
951e1051a39Sopenharmony_ci    B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
952e1051a39Sopenharmony_ci    B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
953e1051a39Sopenharmony_ci    B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
954e1051a39Sopenharmony_ci
955e1051a39Sopenharmony_ci    /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
956e1051a39Sopenharmony_ci    /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
957e1051a39Sopenharmony_ci    /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
958e1051a39Sopenharmony_ci    /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
959e1051a39Sopenharmony_ci    /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
960e1051a39Sopenharmony_ci
961e1051a39Sopenharmony_ci    B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
962e1051a39Sopenharmony_ci    B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
963e1051a39Sopenharmony_ci    B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
964e1051a39Sopenharmony_ci    B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
965e1051a39Sopenharmony_ci    B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
966e1051a39Sopenharmony_ci
967e1051a39Sopenharmony_ci    /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
968e1051a39Sopenharmony_ci    /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
969e1051a39Sopenharmony_ci    /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
970e1051a39Sopenharmony_ci    /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
971e1051a39Sopenharmony_ci    /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
972e1051a39Sopenharmony_ci}
973e1051a39Sopenharmony_ci
974e1051a39Sopenharmony_cistatic void KeccakF1600(uint64_t A[5][5])
975e1051a39Sopenharmony_ci{
976e1051a39Sopenharmony_ci    size_t i;
977e1051a39Sopenharmony_ci
978e1051a39Sopenharmony_ci    for (i = 0; i < 24; i += 4) {
979e1051a39Sopenharmony_ci        FourRounds(A, i);
980e1051a39Sopenharmony_ci    }
981e1051a39Sopenharmony_ci}
982e1051a39Sopenharmony_ci
983e1051a39Sopenharmony_ci#endif
984e1051a39Sopenharmony_ci
985e1051a39Sopenharmony_cistatic uint64_t BitInterleave(uint64_t Ai)
986e1051a39Sopenharmony_ci{
987e1051a39Sopenharmony_ci    if (BIT_INTERLEAVE) {
988e1051a39Sopenharmony_ci        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
989e1051a39Sopenharmony_ci        uint32_t t0, t1;
990e1051a39Sopenharmony_ci
991e1051a39Sopenharmony_ci        t0 = lo & 0x55555555;
992e1051a39Sopenharmony_ci        t0 |= t0 >> 1;  t0 &= 0x33333333;
993e1051a39Sopenharmony_ci        t0 |= t0 >> 2;  t0 &= 0x0f0f0f0f;
994e1051a39Sopenharmony_ci        t0 |= t0 >> 4;  t0 &= 0x00ff00ff;
995e1051a39Sopenharmony_ci        t0 |= t0 >> 8;  t0 &= 0x0000ffff;
996e1051a39Sopenharmony_ci
997e1051a39Sopenharmony_ci        t1 = hi & 0x55555555;
998e1051a39Sopenharmony_ci        t1 |= t1 >> 1;  t1 &= 0x33333333;
999e1051a39Sopenharmony_ci        t1 |= t1 >> 2;  t1 &= 0x0f0f0f0f;
1000e1051a39Sopenharmony_ci        t1 |= t1 >> 4;  t1 &= 0x00ff00ff;
1001e1051a39Sopenharmony_ci        t1 |= t1 >> 8;  t1 <<= 16;
1002e1051a39Sopenharmony_ci
1003e1051a39Sopenharmony_ci        lo &= 0xaaaaaaaa;
1004e1051a39Sopenharmony_ci        lo |= lo << 1;  lo &= 0xcccccccc;
1005e1051a39Sopenharmony_ci        lo |= lo << 2;  lo &= 0xf0f0f0f0;
1006e1051a39Sopenharmony_ci        lo |= lo << 4;  lo &= 0xff00ff00;
1007e1051a39Sopenharmony_ci        lo |= lo << 8;  lo >>= 16;
1008e1051a39Sopenharmony_ci
1009e1051a39Sopenharmony_ci        hi &= 0xaaaaaaaa;
1010e1051a39Sopenharmony_ci        hi |= hi << 1;  hi &= 0xcccccccc;
1011e1051a39Sopenharmony_ci        hi |= hi << 2;  hi &= 0xf0f0f0f0;
1012e1051a39Sopenharmony_ci        hi |= hi << 4;  hi &= 0xff00ff00;
1013e1051a39Sopenharmony_ci        hi |= hi << 8;  hi &= 0xffff0000;
1014e1051a39Sopenharmony_ci
1015e1051a39Sopenharmony_ci        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1016e1051a39Sopenharmony_ci    }
1017e1051a39Sopenharmony_ci
1018e1051a39Sopenharmony_ci    return Ai;
1019e1051a39Sopenharmony_ci}
1020e1051a39Sopenharmony_ci
1021e1051a39Sopenharmony_cistatic uint64_t BitDeinterleave(uint64_t Ai)
1022e1051a39Sopenharmony_ci{
1023e1051a39Sopenharmony_ci    if (BIT_INTERLEAVE) {
1024e1051a39Sopenharmony_ci        uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
1025e1051a39Sopenharmony_ci        uint32_t t0, t1;
1026e1051a39Sopenharmony_ci
1027e1051a39Sopenharmony_ci        t0 = lo & 0x0000ffff;
1028e1051a39Sopenharmony_ci        t0 |= t0 << 8;  t0 &= 0x00ff00ff;
1029e1051a39Sopenharmony_ci        t0 |= t0 << 4;  t0 &= 0x0f0f0f0f;
1030e1051a39Sopenharmony_ci        t0 |= t0 << 2;  t0 &= 0x33333333;
1031e1051a39Sopenharmony_ci        t0 |= t0 << 1;  t0 &= 0x55555555;
1032e1051a39Sopenharmony_ci
1033e1051a39Sopenharmony_ci        t1 = hi << 16;
1034e1051a39Sopenharmony_ci        t1 |= t1 >> 8;  t1 &= 0xff00ff00;
1035e1051a39Sopenharmony_ci        t1 |= t1 >> 4;  t1 &= 0xf0f0f0f0;
1036e1051a39Sopenharmony_ci        t1 |= t1 >> 2;  t1 &= 0xcccccccc;
1037e1051a39Sopenharmony_ci        t1 |= t1 >> 1;  t1 &= 0xaaaaaaaa;
1038e1051a39Sopenharmony_ci
1039e1051a39Sopenharmony_ci        lo >>= 16;
1040e1051a39Sopenharmony_ci        lo |= lo << 8;  lo &= 0x00ff00ff;
1041e1051a39Sopenharmony_ci        lo |= lo << 4;  lo &= 0x0f0f0f0f;
1042e1051a39Sopenharmony_ci        lo |= lo << 2;  lo &= 0x33333333;
1043e1051a39Sopenharmony_ci        lo |= lo << 1;  lo &= 0x55555555;
1044e1051a39Sopenharmony_ci
1045e1051a39Sopenharmony_ci        hi &= 0xffff0000;
1046e1051a39Sopenharmony_ci        hi |= hi >> 8;  hi &= 0xff00ff00;
1047e1051a39Sopenharmony_ci        hi |= hi >> 4;  hi &= 0xf0f0f0f0;
1048e1051a39Sopenharmony_ci        hi |= hi >> 2;  hi &= 0xcccccccc;
1049e1051a39Sopenharmony_ci        hi |= hi >> 1;  hi &= 0xaaaaaaaa;
1050e1051a39Sopenharmony_ci
1051e1051a39Sopenharmony_ci        Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1052e1051a39Sopenharmony_ci    }
1053e1051a39Sopenharmony_ci
1054e1051a39Sopenharmony_ci    return Ai;
1055e1051a39Sopenharmony_ci}
1056e1051a39Sopenharmony_ci
1057e1051a39Sopenharmony_ci/*
1058e1051a39Sopenharmony_ci * SHA3_absorb can be called multiple times, but at each invocation
1059e1051a39Sopenharmony_ci * largest multiple of |r| out of |len| bytes are processed. Then
1060e1051a39Sopenharmony_ci * remaining amount of bytes is returned. This is done to spare caller
1061e1051a39Sopenharmony_ci * trouble of calculating the largest multiple of |r|. |r| can be viewed
1062e1051a39Sopenharmony_ci * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
1063e1051a39Sopenharmony_ci * 72, but can also be (1600 - 448)/8 = 144. All this means that message
1064e1051a39Sopenharmony_ci * padding and intermediate sub-block buffering, byte- or bitwise, is
1065e1051a39Sopenharmony_ci * caller's responsibility.
1066e1051a39Sopenharmony_ci */
1067e1051a39Sopenharmony_cisize_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
1068e1051a39Sopenharmony_ci                   size_t r)
1069e1051a39Sopenharmony_ci{
1070e1051a39Sopenharmony_ci    uint64_t *A_flat = (uint64_t *)A;
1071e1051a39Sopenharmony_ci    size_t i, w = r / 8;
1072e1051a39Sopenharmony_ci
1073e1051a39Sopenharmony_ci    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1074e1051a39Sopenharmony_ci
1075e1051a39Sopenharmony_ci    while (len >= r) {
1076e1051a39Sopenharmony_ci        for (i = 0; i < w; i++) {
1077e1051a39Sopenharmony_ci            uint64_t Ai = (uint64_t)inp[0]       | (uint64_t)inp[1] << 8  |
1078e1051a39Sopenharmony_ci                          (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
1079e1051a39Sopenharmony_ci                          (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
1080e1051a39Sopenharmony_ci                          (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1081e1051a39Sopenharmony_ci            inp += 8;
1082e1051a39Sopenharmony_ci
1083e1051a39Sopenharmony_ci            A_flat[i] ^= BitInterleave(Ai);
1084e1051a39Sopenharmony_ci        }
1085e1051a39Sopenharmony_ci        KeccakF1600(A);
1086e1051a39Sopenharmony_ci        len -= r;
1087e1051a39Sopenharmony_ci    }
1088e1051a39Sopenharmony_ci
1089e1051a39Sopenharmony_ci    return len;
1090e1051a39Sopenharmony_ci}
1091e1051a39Sopenharmony_ci
1092e1051a39Sopenharmony_ci/*
1093e1051a39Sopenharmony_ci * sha3_squeeze is called once at the end to generate |out| hash value
1094e1051a39Sopenharmony_ci * of |len| bytes.
1095e1051a39Sopenharmony_ci */
1096e1051a39Sopenharmony_civoid SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r)
1097e1051a39Sopenharmony_ci{
1098e1051a39Sopenharmony_ci    uint64_t *A_flat = (uint64_t *)A;
1099e1051a39Sopenharmony_ci    size_t i, w = r / 8;
1100e1051a39Sopenharmony_ci
1101e1051a39Sopenharmony_ci    assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1102e1051a39Sopenharmony_ci
1103e1051a39Sopenharmony_ci    while (len != 0) {
1104e1051a39Sopenharmony_ci        for (i = 0; i < w && len != 0; i++) {
1105e1051a39Sopenharmony_ci            uint64_t Ai = BitDeinterleave(A_flat[i]);
1106e1051a39Sopenharmony_ci
1107e1051a39Sopenharmony_ci            if (len < 8) {
1108e1051a39Sopenharmony_ci                for (i = 0; i < len; i++) {
1109e1051a39Sopenharmony_ci                    *out++ = (unsigned char)Ai;
1110e1051a39Sopenharmony_ci                    Ai >>= 8;
1111e1051a39Sopenharmony_ci                }
1112e1051a39Sopenharmony_ci                return;
1113e1051a39Sopenharmony_ci            }
1114e1051a39Sopenharmony_ci
1115e1051a39Sopenharmony_ci            out[0] = (unsigned char)(Ai);
1116e1051a39Sopenharmony_ci            out[1] = (unsigned char)(Ai >> 8);
1117e1051a39Sopenharmony_ci            out[2] = (unsigned char)(Ai >> 16);
1118e1051a39Sopenharmony_ci            out[3] = (unsigned char)(Ai >> 24);
1119e1051a39Sopenharmony_ci            out[4] = (unsigned char)(Ai >> 32);
1120e1051a39Sopenharmony_ci            out[5] = (unsigned char)(Ai >> 40);
1121e1051a39Sopenharmony_ci            out[6] = (unsigned char)(Ai >> 48);
1122e1051a39Sopenharmony_ci            out[7] = (unsigned char)(Ai >> 56);
1123e1051a39Sopenharmony_ci            out += 8;
1124e1051a39Sopenharmony_ci            len -= 8;
1125e1051a39Sopenharmony_ci        }
1126e1051a39Sopenharmony_ci        if (len)
1127e1051a39Sopenharmony_ci            KeccakF1600(A);
1128e1051a39Sopenharmony_ci    }
1129e1051a39Sopenharmony_ci}
1130e1051a39Sopenharmony_ci#endif
1131e1051a39Sopenharmony_ci
1132e1051a39Sopenharmony_ci#ifdef SELFTEST
1133e1051a39Sopenharmony_ci/*
1134e1051a39Sopenharmony_ci * Post-padding one-shot implementations would look as following:
1135e1051a39Sopenharmony_ci *
1136e1051a39Sopenharmony_ci * SHA3_224     SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1137e1051a39Sopenharmony_ci * SHA3_256     SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1138e1051a39Sopenharmony_ci * SHA3_384     SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1139e1051a39Sopenharmony_ci * SHA3_512     SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1140e1051a39Sopenharmony_ci * SHAKE_128    SHA3_sponge(inp, len, out, d, (1600-256)/8);
1141e1051a39Sopenharmony_ci * SHAKE_256    SHA3_sponge(inp, len, out, d, (1600-512)/8);
1142e1051a39Sopenharmony_ci */
1143e1051a39Sopenharmony_ci
1144e1051a39Sopenharmony_civoid SHA3_sponge(const unsigned char *inp, size_t len,
1145e1051a39Sopenharmony_ci                 unsigned char *out, size_t d, size_t r)
1146e1051a39Sopenharmony_ci{
1147e1051a39Sopenharmony_ci    uint64_t A[5][5];
1148e1051a39Sopenharmony_ci
1149e1051a39Sopenharmony_ci    memset(A, 0, sizeof(A));
1150e1051a39Sopenharmony_ci    SHA3_absorb(A, inp, len, r);
1151e1051a39Sopenharmony_ci    SHA3_squeeze(A, out, d, r);
1152e1051a39Sopenharmony_ci}
1153e1051a39Sopenharmony_ci
1154e1051a39Sopenharmony_ci# include <stdio.h>
1155e1051a39Sopenharmony_ci
1156e1051a39Sopenharmony_ciint main()
1157e1051a39Sopenharmony_ci{
1158e1051a39Sopenharmony_ci    /*
1159e1051a39Sopenharmony_ci     * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1160e1051a39Sopenharmony_ci     */
1161e1051a39Sopenharmony_ci    unsigned char test[168] = { '\xf3', '\x3' };
1162e1051a39Sopenharmony_ci    unsigned char out[512];
1163e1051a39Sopenharmony_ci    size_t i;
1164e1051a39Sopenharmony_ci    static const unsigned char result[512] = {
1165e1051a39Sopenharmony_ci        0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1166e1051a39Sopenharmony_ci        0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1167e1051a39Sopenharmony_ci        0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1168e1051a39Sopenharmony_ci        0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1169e1051a39Sopenharmony_ci        0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1170e1051a39Sopenharmony_ci        0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1171e1051a39Sopenharmony_ci        0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1172e1051a39Sopenharmony_ci        0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1173e1051a39Sopenharmony_ci        0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1174e1051a39Sopenharmony_ci        0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1175e1051a39Sopenharmony_ci        0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1176e1051a39Sopenharmony_ci        0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1177e1051a39Sopenharmony_ci        0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1178e1051a39Sopenharmony_ci        0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1179e1051a39Sopenharmony_ci        0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1180e1051a39Sopenharmony_ci        0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1181e1051a39Sopenharmony_ci        0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1182e1051a39Sopenharmony_ci        0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1183e1051a39Sopenharmony_ci        0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1184e1051a39Sopenharmony_ci        0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1185e1051a39Sopenharmony_ci        0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1186e1051a39Sopenharmony_ci        0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1187e1051a39Sopenharmony_ci        0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1188e1051a39Sopenharmony_ci        0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1189e1051a39Sopenharmony_ci        0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1190e1051a39Sopenharmony_ci        0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1191e1051a39Sopenharmony_ci        0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1192e1051a39Sopenharmony_ci        0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1193e1051a39Sopenharmony_ci        0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1194e1051a39Sopenharmony_ci        0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1195e1051a39Sopenharmony_ci        0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1196e1051a39Sopenharmony_ci        0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1197e1051a39Sopenharmony_ci        0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1198e1051a39Sopenharmony_ci        0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1199e1051a39Sopenharmony_ci        0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1200e1051a39Sopenharmony_ci        0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1201e1051a39Sopenharmony_ci        0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1202e1051a39Sopenharmony_ci        0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1203e1051a39Sopenharmony_ci        0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1204e1051a39Sopenharmony_ci        0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1205e1051a39Sopenharmony_ci        0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1206e1051a39Sopenharmony_ci        0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1207e1051a39Sopenharmony_ci        0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1208e1051a39Sopenharmony_ci        0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1209e1051a39Sopenharmony_ci        0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1210e1051a39Sopenharmony_ci        0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1211e1051a39Sopenharmony_ci        0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1212e1051a39Sopenharmony_ci        0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1213e1051a39Sopenharmony_ci        0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1214e1051a39Sopenharmony_ci        0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1215e1051a39Sopenharmony_ci        0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1216e1051a39Sopenharmony_ci        0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1217e1051a39Sopenharmony_ci        0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1218e1051a39Sopenharmony_ci        0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1219e1051a39Sopenharmony_ci        0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1220e1051a39Sopenharmony_ci        0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1221e1051a39Sopenharmony_ci        0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1222e1051a39Sopenharmony_ci        0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1223e1051a39Sopenharmony_ci        0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1224e1051a39Sopenharmony_ci        0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1225e1051a39Sopenharmony_ci        0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1226e1051a39Sopenharmony_ci        0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1227e1051a39Sopenharmony_ci        0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1228e1051a39Sopenharmony_ci        0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1229e1051a39Sopenharmony_ci    };
1230e1051a39Sopenharmony_ci
1231e1051a39Sopenharmony_ci    test[167] = '\x80';
1232e1051a39Sopenharmony_ci    SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1233e1051a39Sopenharmony_ci
1234e1051a39Sopenharmony_ci    /*
1235e1051a39Sopenharmony_ci     * Rationale behind keeping output [formatted as below] is that
1236e1051a39Sopenharmony_ci     * one should be able to redirect it to a file, then copy-n-paste
1237e1051a39Sopenharmony_ci     * final "output val" from official example to another file, and
1238e1051a39Sopenharmony_ci     * compare the two with diff(1).
1239e1051a39Sopenharmony_ci     */
1240e1051a39Sopenharmony_ci    for (i = 0; i < sizeof(out);) {
1241e1051a39Sopenharmony_ci        printf("%02X", out[i]);
1242e1051a39Sopenharmony_ci        printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1243e1051a39Sopenharmony_ci    }
1244e1051a39Sopenharmony_ci
1245e1051a39Sopenharmony_ci    if (memcmp(out,result,sizeof(out))) {
1246e1051a39Sopenharmony_ci        fprintf(stderr,"failure\n");
1247e1051a39Sopenharmony_ci        return 1;
1248e1051a39Sopenharmony_ci    } else {
1249e1051a39Sopenharmony_ci        fprintf(stderr,"success\n");
1250e1051a39Sopenharmony_ci        return 0;
1251e1051a39Sopenharmony_ci    }
1252e1051a39Sopenharmony_ci}
1253e1051a39Sopenharmony_ci#endif
1254