1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * 4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci */ 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci#include <string.h> 11e1051a39Sopenharmony_ci#include <openssl/crypto.h> 12e1051a39Sopenharmony_ci#include "internal/cryptlib.h" 13e1051a39Sopenharmony_ci#include "internal/endian.h" 14e1051a39Sopenharmony_ci#include "crypto/modes.h" 15e1051a39Sopenharmony_ci 16e1051a39Sopenharmony_ci#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) 17e1051a39Sopenharmony_citypedef size_t size_t_aX __attribute((__aligned__(1))); 18e1051a39Sopenharmony_ci#else 19e1051a39Sopenharmony_citypedef size_t size_t_aX; 20e1051a39Sopenharmony_ci#endif 21e1051a39Sopenharmony_ci 22e1051a39Sopenharmony_ci#if defined(BSWAP4) && defined(STRICT_ALIGNMENT) 23e1051a39Sopenharmony_ci/* redefine, because alignment is ensured */ 24e1051a39Sopenharmony_ci# undef GETU32 25e1051a39Sopenharmony_ci# define GETU32(p) BSWAP4(*(const u32 *)(p)) 26e1051a39Sopenharmony_ci# undef PUTU32 27e1051a39Sopenharmony_ci# define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) 28e1051a39Sopenharmony_ci#endif 29e1051a39Sopenharmony_ci 30e1051a39Sopenharmony_ci#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) 31e1051a39Sopenharmony_ci#define REDUCE1BIT(V) do { \ 32e1051a39Sopenharmony_ci if (sizeof(size_t)==8) { \ 33e1051a39Sopenharmony_ci u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ 34e1051a39Sopenharmony_ci V.lo = (V.hi<<63)|(V.lo>>1); \ 35e1051a39Sopenharmony_ci V.hi = (V.hi>>1 )^T; \ 36e1051a39Sopenharmony_ci } \ 37e1051a39Sopenharmony_ci else { \ 38e1051a39Sopenharmony_ci u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ 39e1051a39Sopenharmony_ci V.lo = (V.hi<<63)|(V.lo>>1); \ 40e1051a39Sopenharmony_ci V.hi = (V.hi>>1 )^((u64)T<<32); \ 41e1051a39Sopenharmony_ci } \ 42e1051a39Sopenharmony_ci} while(0) 43e1051a39Sopenharmony_ci 44e1051a39Sopenharmony_ci/*- 45e1051a39Sopenharmony_ci * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should 46e1051a39Sopenharmony_ci * never be set to 8. 8 is effectively reserved for testing purposes. 47e1051a39Sopenharmony_ci * TABLE_BITS>1 are lookup-table-driven implementations referred to as 48e1051a39Sopenharmony_ci * "Shoup's" in GCM specification. In other words OpenSSL does not cover 49e1051a39Sopenharmony_ci * whole spectrum of possible table driven implementations. Why? In 50e1051a39Sopenharmony_ci * non-"Shoup's" case memory access pattern is segmented in such manner, 51e1051a39Sopenharmony_ci * that it's trivial to see that cache timing information can reveal 52e1051a39Sopenharmony_ci * fair portion of intermediate hash value. Given that ciphertext is 53e1051a39Sopenharmony_ci * always available to attacker, it's possible for him to attempt to 54e1051a39Sopenharmony_ci * deduce secret parameter H and if successful, tamper with messages 55e1051a39Sopenharmony_ci * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's 56e1051a39Sopenharmony_ci * not as trivial, but there is no reason to believe that it's resistant 57e1051a39Sopenharmony_ci * to cache-timing attack. And the thing about "8-bit" implementation is 58e1051a39Sopenharmony_ci * that it consumes 16 (sixteen) times more memory, 4KB per individual 59e1051a39Sopenharmony_ci * key + 1KB shared. Well, on pros side it should be twice as fast as 60e1051a39Sopenharmony_ci * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version 61e1051a39Sopenharmony_ci * was observed to run ~75% faster, closer to 100% for commercial 62e1051a39Sopenharmony_ci * compilers... Yet "4-bit" procedure is preferred, because it's 63e1051a39Sopenharmony_ci * believed to provide better security-performance balance and adequate 64e1051a39Sopenharmony_ci * all-round performance. "All-round" refers to things like: 65e1051a39Sopenharmony_ci * 66e1051a39Sopenharmony_ci * - shorter setup time effectively improves overall timing for 67e1051a39Sopenharmony_ci * handling short messages; 68e1051a39Sopenharmony_ci * - larger table allocation can become unbearable because of VM 69e1051a39Sopenharmony_ci * subsystem penalties (for example on Windows large enough free 70e1051a39Sopenharmony_ci * results in VM working set trimming, meaning that consequent 71e1051a39Sopenharmony_ci * malloc would immediately incur working set expansion); 72e1051a39Sopenharmony_ci * - larger table has larger cache footprint, which can affect 73e1051a39Sopenharmony_ci * performance of other code paths (not necessarily even from same 74e1051a39Sopenharmony_ci * thread in Hyper-Threading world); 75e1051a39Sopenharmony_ci * 76e1051a39Sopenharmony_ci * Value of 1 is not appropriate for performance reasons. 77e1051a39Sopenharmony_ci */ 78e1051a39Sopenharmony_ci#if TABLE_BITS==8 79e1051a39Sopenharmony_ci 80e1051a39Sopenharmony_cistatic void gcm_init_8bit(u128 Htable[256], u64 H[2]) 81e1051a39Sopenharmony_ci{ 82e1051a39Sopenharmony_ci int i, j; 83e1051a39Sopenharmony_ci u128 V; 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ci Htable[0].hi = 0; 86e1051a39Sopenharmony_ci Htable[0].lo = 0; 87e1051a39Sopenharmony_ci V.hi = H[0]; 88e1051a39Sopenharmony_ci V.lo = H[1]; 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_ci for (Htable[128] = V, i = 64; i > 0; i >>= 1) { 91e1051a39Sopenharmony_ci REDUCE1BIT(V); 92e1051a39Sopenharmony_ci Htable[i] = V; 93e1051a39Sopenharmony_ci } 94e1051a39Sopenharmony_ci 95e1051a39Sopenharmony_ci for (i = 2; i < 256; i <<= 1) { 96e1051a39Sopenharmony_ci u128 *Hi = Htable + i, H0 = *Hi; 97e1051a39Sopenharmony_ci for (j = 1; j < i; ++j) { 98e1051a39Sopenharmony_ci Hi[j].hi = H0.hi ^ Htable[j].hi; 99e1051a39Sopenharmony_ci Hi[j].lo = H0.lo ^ Htable[j].lo; 100e1051a39Sopenharmony_ci } 101e1051a39Sopenharmony_ci } 102e1051a39Sopenharmony_ci} 103e1051a39Sopenharmony_ci 104e1051a39Sopenharmony_cistatic void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) 105e1051a39Sopenharmony_ci{ 106e1051a39Sopenharmony_ci u128 Z = { 0, 0 }; 107e1051a39Sopenharmony_ci const u8 *xi = (const u8 *)Xi + 15; 108e1051a39Sopenharmony_ci size_t rem, n = *xi; 109e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 110e1051a39Sopenharmony_ci static const size_t rem_8bit[256] = { 111e1051a39Sopenharmony_ci PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), 112e1051a39Sopenharmony_ci PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), 113e1051a39Sopenharmony_ci PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), 114e1051a39Sopenharmony_ci PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), 115e1051a39Sopenharmony_ci PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), 116e1051a39Sopenharmony_ci PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), 117e1051a39Sopenharmony_ci PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), 118e1051a39Sopenharmony_ci PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), 119e1051a39Sopenharmony_ci PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), 120e1051a39Sopenharmony_ci PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), 121e1051a39Sopenharmony_ci PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), 122e1051a39Sopenharmony_ci PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), 123e1051a39Sopenharmony_ci PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), 124e1051a39Sopenharmony_ci PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), 125e1051a39Sopenharmony_ci PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), 126e1051a39Sopenharmony_ci PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), 127e1051a39Sopenharmony_ci PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), 128e1051a39Sopenharmony_ci PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), 129e1051a39Sopenharmony_ci PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), 130e1051a39Sopenharmony_ci PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), 131e1051a39Sopenharmony_ci PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), 132e1051a39Sopenharmony_ci PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), 133e1051a39Sopenharmony_ci PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), 134e1051a39Sopenharmony_ci PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), 135e1051a39Sopenharmony_ci PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), 136e1051a39Sopenharmony_ci PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), 137e1051a39Sopenharmony_ci PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), 138e1051a39Sopenharmony_ci PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), 139e1051a39Sopenharmony_ci PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), 140e1051a39Sopenharmony_ci PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), 141e1051a39Sopenharmony_ci PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), 142e1051a39Sopenharmony_ci PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), 143e1051a39Sopenharmony_ci PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), 144e1051a39Sopenharmony_ci PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), 145e1051a39Sopenharmony_ci PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), 146e1051a39Sopenharmony_ci PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), 147e1051a39Sopenharmony_ci PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), 148e1051a39Sopenharmony_ci PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), 149e1051a39Sopenharmony_ci PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), 150e1051a39Sopenharmony_ci PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), 151e1051a39Sopenharmony_ci PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), 152e1051a39Sopenharmony_ci PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), 153e1051a39Sopenharmony_ci PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), 154e1051a39Sopenharmony_ci PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), 155e1051a39Sopenharmony_ci PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), 156e1051a39Sopenharmony_ci PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), 157e1051a39Sopenharmony_ci PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), 158e1051a39Sopenharmony_ci PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), 159e1051a39Sopenharmony_ci PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), 160e1051a39Sopenharmony_ci PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), 161e1051a39Sopenharmony_ci PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), 162e1051a39Sopenharmony_ci PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), 163e1051a39Sopenharmony_ci PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), 164e1051a39Sopenharmony_ci PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), 165e1051a39Sopenharmony_ci PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), 166e1051a39Sopenharmony_ci PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), 167e1051a39Sopenharmony_ci PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), 168e1051a39Sopenharmony_ci PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), 169e1051a39Sopenharmony_ci PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), 170e1051a39Sopenharmony_ci PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), 171e1051a39Sopenharmony_ci PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), 172e1051a39Sopenharmony_ci PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), 173e1051a39Sopenharmony_ci PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), 174e1051a39Sopenharmony_ci PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) 175e1051a39Sopenharmony_ci }; 176e1051a39Sopenharmony_ci 177e1051a39Sopenharmony_ci while (1) { 178e1051a39Sopenharmony_ci Z.hi ^= Htable[n].hi; 179e1051a39Sopenharmony_ci Z.lo ^= Htable[n].lo; 180e1051a39Sopenharmony_ci 181e1051a39Sopenharmony_ci if ((u8 *)Xi == xi) 182e1051a39Sopenharmony_ci break; 183e1051a39Sopenharmony_ci 184e1051a39Sopenharmony_ci n = *(--xi); 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci rem = (size_t)Z.lo & 0xff; 187e1051a39Sopenharmony_ci Z.lo = (Z.hi << 56) | (Z.lo >> 8); 188e1051a39Sopenharmony_ci Z.hi = (Z.hi >> 8); 189e1051a39Sopenharmony_ci if (sizeof(size_t) == 8) 190e1051a39Sopenharmony_ci Z.hi ^= rem_8bit[rem]; 191e1051a39Sopenharmony_ci else 192e1051a39Sopenharmony_ci Z.hi ^= (u64)rem_8bit[rem] << 32; 193e1051a39Sopenharmony_ci } 194e1051a39Sopenharmony_ci 195e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 196e1051a39Sopenharmony_ci# ifdef BSWAP8 197e1051a39Sopenharmony_ci Xi[0] = BSWAP8(Z.hi); 198e1051a39Sopenharmony_ci Xi[1] = BSWAP8(Z.lo); 199e1051a39Sopenharmony_ci# else 200e1051a39Sopenharmony_ci u8 *p = (u8 *)Xi; 201e1051a39Sopenharmony_ci u32 v; 202e1051a39Sopenharmony_ci v = (u32)(Z.hi >> 32); 203e1051a39Sopenharmony_ci PUTU32(p, v); 204e1051a39Sopenharmony_ci v = (u32)(Z.hi); 205e1051a39Sopenharmony_ci PUTU32(p + 4, v); 206e1051a39Sopenharmony_ci v = (u32)(Z.lo >> 32); 207e1051a39Sopenharmony_ci PUTU32(p + 8, v); 208e1051a39Sopenharmony_ci v = (u32)(Z.lo); 209e1051a39Sopenharmony_ci PUTU32(p + 12, v); 210e1051a39Sopenharmony_ci# endif 211e1051a39Sopenharmony_ci } else { 212e1051a39Sopenharmony_ci Xi[0] = Z.hi; 213e1051a39Sopenharmony_ci Xi[1] = Z.lo; 214e1051a39Sopenharmony_ci } 215e1051a39Sopenharmony_ci} 216e1051a39Sopenharmony_ci 217e1051a39Sopenharmony_ci# define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci#elif TABLE_BITS==4 220e1051a39Sopenharmony_ci 221e1051a39Sopenharmony_cistatic void gcm_init_4bit(u128 Htable[16], u64 H[2]) 222e1051a39Sopenharmony_ci{ 223e1051a39Sopenharmony_ci u128 V; 224e1051a39Sopenharmony_ci# if defined(OPENSSL_SMALL_FOOTPRINT) 225e1051a39Sopenharmony_ci int i; 226e1051a39Sopenharmony_ci# endif 227e1051a39Sopenharmony_ci 228e1051a39Sopenharmony_ci Htable[0].hi = 0; 229e1051a39Sopenharmony_ci Htable[0].lo = 0; 230e1051a39Sopenharmony_ci V.hi = H[0]; 231e1051a39Sopenharmony_ci V.lo = H[1]; 232e1051a39Sopenharmony_ci 233e1051a39Sopenharmony_ci# if defined(OPENSSL_SMALL_FOOTPRINT) 234e1051a39Sopenharmony_ci for (Htable[8] = V, i = 4; i > 0; i >>= 1) { 235e1051a39Sopenharmony_ci REDUCE1BIT(V); 236e1051a39Sopenharmony_ci Htable[i] = V; 237e1051a39Sopenharmony_ci } 238e1051a39Sopenharmony_ci 239e1051a39Sopenharmony_ci for (i = 2; i < 16; i <<= 1) { 240e1051a39Sopenharmony_ci u128 *Hi = Htable + i; 241e1051a39Sopenharmony_ci int j; 242e1051a39Sopenharmony_ci for (V = *Hi, j = 1; j < i; ++j) { 243e1051a39Sopenharmony_ci Hi[j].hi = V.hi ^ Htable[j].hi; 244e1051a39Sopenharmony_ci Hi[j].lo = V.lo ^ Htable[j].lo; 245e1051a39Sopenharmony_ci } 246e1051a39Sopenharmony_ci } 247e1051a39Sopenharmony_ci# else 248e1051a39Sopenharmony_ci Htable[8] = V; 249e1051a39Sopenharmony_ci REDUCE1BIT(V); 250e1051a39Sopenharmony_ci Htable[4] = V; 251e1051a39Sopenharmony_ci REDUCE1BIT(V); 252e1051a39Sopenharmony_ci Htable[2] = V; 253e1051a39Sopenharmony_ci REDUCE1BIT(V); 254e1051a39Sopenharmony_ci Htable[1] = V; 255e1051a39Sopenharmony_ci Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo; 256e1051a39Sopenharmony_ci V = Htable[4]; 257e1051a39Sopenharmony_ci Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo; 258e1051a39Sopenharmony_ci Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo; 259e1051a39Sopenharmony_ci Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo; 260e1051a39Sopenharmony_ci V = Htable[8]; 261e1051a39Sopenharmony_ci Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo; 262e1051a39Sopenharmony_ci Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo; 263e1051a39Sopenharmony_ci Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo; 264e1051a39Sopenharmony_ci Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo; 265e1051a39Sopenharmony_ci Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo; 266e1051a39Sopenharmony_ci Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo; 267e1051a39Sopenharmony_ci Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo; 268e1051a39Sopenharmony_ci# endif 269e1051a39Sopenharmony_ci# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) 270e1051a39Sopenharmony_ci /* 271e1051a39Sopenharmony_ci * ARM assembler expects specific dword order in Htable. 272e1051a39Sopenharmony_ci */ 273e1051a39Sopenharmony_ci { 274e1051a39Sopenharmony_ci int j; 275e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 276e1051a39Sopenharmony_ci 277e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 278e1051a39Sopenharmony_ci for (j = 0; j < 16; ++j) { 279e1051a39Sopenharmony_ci V = Htable[j]; 280e1051a39Sopenharmony_ci Htable[j].hi = V.lo; 281e1051a39Sopenharmony_ci Htable[j].lo = V.hi; 282e1051a39Sopenharmony_ci } else 283e1051a39Sopenharmony_ci for (j = 0; j < 16; ++j) { 284e1051a39Sopenharmony_ci V = Htable[j]; 285e1051a39Sopenharmony_ci Htable[j].hi = V.lo << 32 | V.lo >> 32; 286e1051a39Sopenharmony_ci Htable[j].lo = V.hi << 32 | V.hi >> 32; 287e1051a39Sopenharmony_ci } 288e1051a39Sopenharmony_ci } 289e1051a39Sopenharmony_ci# endif 290e1051a39Sopenharmony_ci} 291e1051a39Sopenharmony_ci 292e1051a39Sopenharmony_ci# ifndef GHASH_ASM 293e1051a39Sopenharmony_cistatic const size_t rem_4bit[16] = { 294e1051a39Sopenharmony_ci PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), 295e1051a39Sopenharmony_ci PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), 296e1051a39Sopenharmony_ci PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), 297e1051a39Sopenharmony_ci PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) 298e1051a39Sopenharmony_ci}; 299e1051a39Sopenharmony_ci 300e1051a39Sopenharmony_cistatic void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) 301e1051a39Sopenharmony_ci{ 302e1051a39Sopenharmony_ci u128 Z; 303e1051a39Sopenharmony_ci int cnt = 15; 304e1051a39Sopenharmony_ci size_t rem, nlo, nhi; 305e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci nlo = ((const u8 *)Xi)[15]; 308e1051a39Sopenharmony_ci nhi = nlo >> 4; 309e1051a39Sopenharmony_ci nlo &= 0xf; 310e1051a39Sopenharmony_ci 311e1051a39Sopenharmony_ci Z.hi = Htable[nlo].hi; 312e1051a39Sopenharmony_ci Z.lo = Htable[nlo].lo; 313e1051a39Sopenharmony_ci 314e1051a39Sopenharmony_ci while (1) { 315e1051a39Sopenharmony_ci rem = (size_t)Z.lo & 0xf; 316e1051a39Sopenharmony_ci Z.lo = (Z.hi << 60) | (Z.lo >> 4); 317e1051a39Sopenharmony_ci Z.hi = (Z.hi >> 4); 318e1051a39Sopenharmony_ci if (sizeof(size_t) == 8) 319e1051a39Sopenharmony_ci Z.hi ^= rem_4bit[rem]; 320e1051a39Sopenharmony_ci else 321e1051a39Sopenharmony_ci Z.hi ^= (u64)rem_4bit[rem] << 32; 322e1051a39Sopenharmony_ci 323e1051a39Sopenharmony_ci Z.hi ^= Htable[nhi].hi; 324e1051a39Sopenharmony_ci Z.lo ^= Htable[nhi].lo; 325e1051a39Sopenharmony_ci 326e1051a39Sopenharmony_ci if (--cnt < 0) 327e1051a39Sopenharmony_ci break; 328e1051a39Sopenharmony_ci 329e1051a39Sopenharmony_ci nlo = ((const u8 *)Xi)[cnt]; 330e1051a39Sopenharmony_ci nhi = nlo >> 4; 331e1051a39Sopenharmony_ci nlo &= 0xf; 332e1051a39Sopenharmony_ci 333e1051a39Sopenharmony_ci rem = (size_t)Z.lo & 0xf; 334e1051a39Sopenharmony_ci Z.lo = (Z.hi << 60) | (Z.lo >> 4); 335e1051a39Sopenharmony_ci Z.hi = (Z.hi >> 4); 336e1051a39Sopenharmony_ci if (sizeof(size_t) == 8) 337e1051a39Sopenharmony_ci Z.hi ^= rem_4bit[rem]; 338e1051a39Sopenharmony_ci else 339e1051a39Sopenharmony_ci Z.hi ^= (u64)rem_4bit[rem] << 32; 340e1051a39Sopenharmony_ci 341e1051a39Sopenharmony_ci Z.hi ^= Htable[nlo].hi; 342e1051a39Sopenharmony_ci Z.lo ^= Htable[nlo].lo; 343e1051a39Sopenharmony_ci } 344e1051a39Sopenharmony_ci 345e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 346e1051a39Sopenharmony_ci# ifdef BSWAP8 347e1051a39Sopenharmony_ci Xi[0] = BSWAP8(Z.hi); 348e1051a39Sopenharmony_ci Xi[1] = BSWAP8(Z.lo); 349e1051a39Sopenharmony_ci# else 350e1051a39Sopenharmony_ci u8 *p = (u8 *)Xi; 351e1051a39Sopenharmony_ci u32 v; 352e1051a39Sopenharmony_ci v = (u32)(Z.hi >> 32); 353e1051a39Sopenharmony_ci PUTU32(p, v); 354e1051a39Sopenharmony_ci v = (u32)(Z.hi); 355e1051a39Sopenharmony_ci PUTU32(p + 4, v); 356e1051a39Sopenharmony_ci v = (u32)(Z.lo >> 32); 357e1051a39Sopenharmony_ci PUTU32(p + 8, v); 358e1051a39Sopenharmony_ci v = (u32)(Z.lo); 359e1051a39Sopenharmony_ci PUTU32(p + 12, v); 360e1051a39Sopenharmony_ci# endif 361e1051a39Sopenharmony_ci } else { 362e1051a39Sopenharmony_ci Xi[0] = Z.hi; 363e1051a39Sopenharmony_ci Xi[1] = Z.lo; 364e1051a39Sopenharmony_ci } 365e1051a39Sopenharmony_ci} 366e1051a39Sopenharmony_ci 367e1051a39Sopenharmony_ci# if !defined(OPENSSL_SMALL_FOOTPRINT) 368e1051a39Sopenharmony_ci/* 369e1051a39Sopenharmony_ci * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for 370e1051a39Sopenharmony_ci * details... Compiler-generated code doesn't seem to give any 371e1051a39Sopenharmony_ci * performance improvement, at least not on x86[_64]. It's here 372e1051a39Sopenharmony_ci * mostly as reference and a placeholder for possible future 373e1051a39Sopenharmony_ci * non-trivial optimization[s]... 374e1051a39Sopenharmony_ci */ 375e1051a39Sopenharmony_cistatic void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], 376e1051a39Sopenharmony_ci const u8 *inp, size_t len) 377e1051a39Sopenharmony_ci{ 378e1051a39Sopenharmony_ci u128 Z; 379e1051a39Sopenharmony_ci int cnt; 380e1051a39Sopenharmony_ci size_t rem, nlo, nhi; 381e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 382e1051a39Sopenharmony_ci 383e1051a39Sopenharmony_ci# if 1 384e1051a39Sopenharmony_ci do { 385e1051a39Sopenharmony_ci cnt = 15; 386e1051a39Sopenharmony_ci nlo = ((const u8 *)Xi)[15]; 387e1051a39Sopenharmony_ci nlo ^= inp[15]; 388e1051a39Sopenharmony_ci nhi = nlo >> 4; 389e1051a39Sopenharmony_ci nlo &= 0xf; 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci Z.hi = Htable[nlo].hi; 392e1051a39Sopenharmony_ci Z.lo = Htable[nlo].lo; 393e1051a39Sopenharmony_ci 394e1051a39Sopenharmony_ci while (1) { 395e1051a39Sopenharmony_ci rem = (size_t)Z.lo & 0xf; 396e1051a39Sopenharmony_ci Z.lo = (Z.hi << 60) | (Z.lo >> 4); 397e1051a39Sopenharmony_ci Z.hi = (Z.hi >> 4); 398e1051a39Sopenharmony_ci if (sizeof(size_t) == 8) 399e1051a39Sopenharmony_ci Z.hi ^= rem_4bit[rem]; 400e1051a39Sopenharmony_ci else 401e1051a39Sopenharmony_ci Z.hi ^= (u64)rem_4bit[rem] << 32; 402e1051a39Sopenharmony_ci 403e1051a39Sopenharmony_ci Z.hi ^= Htable[nhi].hi; 404e1051a39Sopenharmony_ci Z.lo ^= Htable[nhi].lo; 405e1051a39Sopenharmony_ci 406e1051a39Sopenharmony_ci if (--cnt < 0) 407e1051a39Sopenharmony_ci break; 408e1051a39Sopenharmony_ci 409e1051a39Sopenharmony_ci nlo = ((const u8 *)Xi)[cnt]; 410e1051a39Sopenharmony_ci nlo ^= inp[cnt]; 411e1051a39Sopenharmony_ci nhi = nlo >> 4; 412e1051a39Sopenharmony_ci nlo &= 0xf; 413e1051a39Sopenharmony_ci 414e1051a39Sopenharmony_ci rem = (size_t)Z.lo & 0xf; 415e1051a39Sopenharmony_ci Z.lo = (Z.hi << 60) | (Z.lo >> 4); 416e1051a39Sopenharmony_ci Z.hi = (Z.hi >> 4); 417e1051a39Sopenharmony_ci if (sizeof(size_t) == 8) 418e1051a39Sopenharmony_ci Z.hi ^= rem_4bit[rem]; 419e1051a39Sopenharmony_ci else 420e1051a39Sopenharmony_ci Z.hi ^= (u64)rem_4bit[rem] << 32; 421e1051a39Sopenharmony_ci 422e1051a39Sopenharmony_ci Z.hi ^= Htable[nlo].hi; 423e1051a39Sopenharmony_ci Z.lo ^= Htable[nlo].lo; 424e1051a39Sopenharmony_ci } 425e1051a39Sopenharmony_ci# else 426e1051a39Sopenharmony_ci /* 427e1051a39Sopenharmony_ci * Extra 256+16 bytes per-key plus 512 bytes shared tables 428e1051a39Sopenharmony_ci * [should] give ~50% improvement... One could have PACK()-ed 429e1051a39Sopenharmony_ci * the rem_8bit even here, but the priority is to minimize 430e1051a39Sopenharmony_ci * cache footprint... 431e1051a39Sopenharmony_ci */ 432e1051a39Sopenharmony_ci u128 Hshr4[16]; /* Htable shifted right by 4 bits */ 433e1051a39Sopenharmony_ci u8 Hshl4[16]; /* Htable shifted left by 4 bits */ 434e1051a39Sopenharmony_ci static const unsigned short rem_8bit[256] = { 435e1051a39Sopenharmony_ci 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, 436e1051a39Sopenharmony_ci 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, 437e1051a39Sopenharmony_ci 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, 438e1051a39Sopenharmony_ci 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, 439e1051a39Sopenharmony_ci 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, 440e1051a39Sopenharmony_ci 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, 441e1051a39Sopenharmony_ci 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, 442e1051a39Sopenharmony_ci 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, 443e1051a39Sopenharmony_ci 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, 444e1051a39Sopenharmony_ci 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, 445e1051a39Sopenharmony_ci 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, 446e1051a39Sopenharmony_ci 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, 447e1051a39Sopenharmony_ci 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, 448e1051a39Sopenharmony_ci 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, 449e1051a39Sopenharmony_ci 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, 450e1051a39Sopenharmony_ci 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, 451e1051a39Sopenharmony_ci 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, 452e1051a39Sopenharmony_ci 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, 453e1051a39Sopenharmony_ci 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, 454e1051a39Sopenharmony_ci 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, 455e1051a39Sopenharmony_ci 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, 456e1051a39Sopenharmony_ci 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, 457e1051a39Sopenharmony_ci 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, 458e1051a39Sopenharmony_ci 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, 459e1051a39Sopenharmony_ci 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, 460e1051a39Sopenharmony_ci 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, 461e1051a39Sopenharmony_ci 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, 462e1051a39Sopenharmony_ci 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, 463e1051a39Sopenharmony_ci 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, 464e1051a39Sopenharmony_ci 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, 465e1051a39Sopenharmony_ci 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, 466e1051a39Sopenharmony_ci 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE 467e1051a39Sopenharmony_ci }; 468e1051a39Sopenharmony_ci /* 469e1051a39Sopenharmony_ci * This pre-processing phase slows down procedure by approximately 470e1051a39Sopenharmony_ci * same time as it makes each loop spin faster. In other words 471e1051a39Sopenharmony_ci * single block performance is approximately same as straightforward 472e1051a39Sopenharmony_ci * "4-bit" implementation, and then it goes only faster... 473e1051a39Sopenharmony_ci */ 474e1051a39Sopenharmony_ci for (cnt = 0; cnt < 16; ++cnt) { 475e1051a39Sopenharmony_ci Z.hi = Htable[cnt].hi; 476e1051a39Sopenharmony_ci Z.lo = Htable[cnt].lo; 477e1051a39Sopenharmony_ci Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4); 478e1051a39Sopenharmony_ci Hshr4[cnt].hi = (Z.hi >> 4); 479e1051a39Sopenharmony_ci Hshl4[cnt] = (u8)(Z.lo << 4); 480e1051a39Sopenharmony_ci } 481e1051a39Sopenharmony_ci 482e1051a39Sopenharmony_ci do { 483e1051a39Sopenharmony_ci for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) { 484e1051a39Sopenharmony_ci nlo = ((const u8 *)Xi)[cnt]; 485e1051a39Sopenharmony_ci nlo ^= inp[cnt]; 486e1051a39Sopenharmony_ci nhi = nlo >> 4; 487e1051a39Sopenharmony_ci nlo &= 0xf; 488e1051a39Sopenharmony_ci 489e1051a39Sopenharmony_ci Z.hi ^= Htable[nlo].hi; 490e1051a39Sopenharmony_ci Z.lo ^= Htable[nlo].lo; 491e1051a39Sopenharmony_ci 492e1051a39Sopenharmony_ci rem = (size_t)Z.lo & 0xff; 493e1051a39Sopenharmony_ci 494e1051a39Sopenharmony_ci Z.lo = (Z.hi << 56) | (Z.lo >> 8); 495e1051a39Sopenharmony_ci Z.hi = (Z.hi >> 8); 496e1051a39Sopenharmony_ci 497e1051a39Sopenharmony_ci Z.hi ^= Hshr4[nhi].hi; 498e1051a39Sopenharmony_ci Z.lo ^= Hshr4[nhi].lo; 499e1051a39Sopenharmony_ci Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48; 500e1051a39Sopenharmony_ci } 501e1051a39Sopenharmony_ci 502e1051a39Sopenharmony_ci nlo = ((const u8 *)Xi)[0]; 503e1051a39Sopenharmony_ci nlo ^= inp[0]; 504e1051a39Sopenharmony_ci nhi = nlo >> 4; 505e1051a39Sopenharmony_ci nlo &= 0xf; 506e1051a39Sopenharmony_ci 507e1051a39Sopenharmony_ci Z.hi ^= Htable[nlo].hi; 508e1051a39Sopenharmony_ci Z.lo ^= Htable[nlo].lo; 509e1051a39Sopenharmony_ci 510e1051a39Sopenharmony_ci rem = (size_t)Z.lo & 0xf; 511e1051a39Sopenharmony_ci 512e1051a39Sopenharmony_ci Z.lo = (Z.hi << 60) | (Z.lo >> 4); 513e1051a39Sopenharmony_ci Z.hi = (Z.hi >> 4); 514e1051a39Sopenharmony_ci 515e1051a39Sopenharmony_ci Z.hi ^= Htable[nhi].hi; 516e1051a39Sopenharmony_ci Z.lo ^= Htable[nhi].lo; 517e1051a39Sopenharmony_ci Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48; 518e1051a39Sopenharmony_ci# endif 519e1051a39Sopenharmony_ci 520e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 521e1051a39Sopenharmony_ci# ifdef BSWAP8 522e1051a39Sopenharmony_ci Xi[0] = BSWAP8(Z.hi); 523e1051a39Sopenharmony_ci Xi[1] = BSWAP8(Z.lo); 524e1051a39Sopenharmony_ci# else 525e1051a39Sopenharmony_ci u8 *p = (u8 *)Xi; 526e1051a39Sopenharmony_ci u32 v; 527e1051a39Sopenharmony_ci v = (u32)(Z.hi >> 32); 528e1051a39Sopenharmony_ci PUTU32(p, v); 529e1051a39Sopenharmony_ci v = (u32)(Z.hi); 530e1051a39Sopenharmony_ci PUTU32(p + 4, v); 531e1051a39Sopenharmony_ci v = (u32)(Z.lo >> 32); 532e1051a39Sopenharmony_ci PUTU32(p + 8, v); 533e1051a39Sopenharmony_ci v = (u32)(Z.lo); 534e1051a39Sopenharmony_ci PUTU32(p + 12, v); 535e1051a39Sopenharmony_ci# endif 536e1051a39Sopenharmony_ci } else { 537e1051a39Sopenharmony_ci Xi[0] = Z.hi; 538e1051a39Sopenharmony_ci Xi[1] = Z.lo; 539e1051a39Sopenharmony_ci } 540e1051a39Sopenharmony_ci } while (inp += 16, len -= 16); 541e1051a39Sopenharmony_ci} 542e1051a39Sopenharmony_ci# endif 543e1051a39Sopenharmony_ci# else 544e1051a39Sopenharmony_civoid gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]); 545e1051a39Sopenharmony_civoid gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp, 546e1051a39Sopenharmony_ci size_t len); 547e1051a39Sopenharmony_ci# endif 548e1051a39Sopenharmony_ci 549e1051a39Sopenharmony_ci# define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) 550e1051a39Sopenharmony_ci# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) 551e1051a39Sopenharmony_ci# define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) 552e1051a39Sopenharmony_ci/* 553e1051a39Sopenharmony_ci * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing 554e1051a39Sopenharmony_ci * effect. In other words idea is to hash data while it's still in L1 cache 555e1051a39Sopenharmony_ci * after encryption pass... 556e1051a39Sopenharmony_ci */ 557e1051a39Sopenharmony_ci# define GHASH_CHUNK (3*1024) 558e1051a39Sopenharmony_ci# endif 559e1051a39Sopenharmony_ci 560e1051a39Sopenharmony_ci#else /* TABLE_BITS */ 561e1051a39Sopenharmony_ci 562e1051a39Sopenharmony_cistatic void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) 563e1051a39Sopenharmony_ci{ 564e1051a39Sopenharmony_ci u128 V, Z = { 0, 0 }; 565e1051a39Sopenharmony_ci long X; 566e1051a39Sopenharmony_ci int i, j; 567e1051a39Sopenharmony_ci const long *xi = (const long *)Xi; 568e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 569e1051a39Sopenharmony_ci 570e1051a39Sopenharmony_ci V.hi = H[0]; /* H is in host byte order, no byte swapping */ 571e1051a39Sopenharmony_ci V.lo = H[1]; 572e1051a39Sopenharmony_ci 573e1051a39Sopenharmony_ci for (j = 0; j < 16 / sizeof(long); ++j) { 574e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 575e1051a39Sopenharmony_ci if (sizeof(long) == 8) { 576e1051a39Sopenharmony_ci# ifdef BSWAP8 577e1051a39Sopenharmony_ci X = (long)(BSWAP8(xi[j])); 578e1051a39Sopenharmony_ci# else 579e1051a39Sopenharmony_ci const u8 *p = (const u8 *)(xi + j); 580e1051a39Sopenharmony_ci X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4)); 581e1051a39Sopenharmony_ci# endif 582e1051a39Sopenharmony_ci } else { 583e1051a39Sopenharmony_ci const u8 *p = (const u8 *)(xi + j); 584e1051a39Sopenharmony_ci X = (long)GETU32(p); 585e1051a39Sopenharmony_ci } 586e1051a39Sopenharmony_ci } else 587e1051a39Sopenharmony_ci X = xi[j]; 588e1051a39Sopenharmony_ci 589e1051a39Sopenharmony_ci for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) { 590e1051a39Sopenharmony_ci u64 M = (u64)(X >> (8 * sizeof(long) - 1)); 591e1051a39Sopenharmony_ci Z.hi ^= V.hi & M; 592e1051a39Sopenharmony_ci Z.lo ^= V.lo & M; 593e1051a39Sopenharmony_ci 594e1051a39Sopenharmony_ci REDUCE1BIT(V); 595e1051a39Sopenharmony_ci } 596e1051a39Sopenharmony_ci } 597e1051a39Sopenharmony_ci 598e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 599e1051a39Sopenharmony_ci# ifdef BSWAP8 600e1051a39Sopenharmony_ci Xi[0] = BSWAP8(Z.hi); 601e1051a39Sopenharmony_ci Xi[1] = BSWAP8(Z.lo); 602e1051a39Sopenharmony_ci# else 603e1051a39Sopenharmony_ci u8 *p = (u8 *)Xi; 604e1051a39Sopenharmony_ci u32 v; 605e1051a39Sopenharmony_ci v = (u32)(Z.hi >> 32); 606e1051a39Sopenharmony_ci PUTU32(p, v); 607e1051a39Sopenharmony_ci v = (u32)(Z.hi); 608e1051a39Sopenharmony_ci PUTU32(p + 4, v); 609e1051a39Sopenharmony_ci v = (u32)(Z.lo >> 32); 610e1051a39Sopenharmony_ci PUTU32(p + 8, v); 611e1051a39Sopenharmony_ci v = (u32)(Z.lo); 612e1051a39Sopenharmony_ci PUTU32(p + 12, v); 613e1051a39Sopenharmony_ci# endif 614e1051a39Sopenharmony_ci } else { 615e1051a39Sopenharmony_ci Xi[0] = Z.hi; 616e1051a39Sopenharmony_ci Xi[1] = Z.lo; 617e1051a39Sopenharmony_ci } 618e1051a39Sopenharmony_ci} 619e1051a39Sopenharmony_ci 620e1051a39Sopenharmony_ci# define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) 621e1051a39Sopenharmony_ci 622e1051a39Sopenharmony_ci#endif 623e1051a39Sopenharmony_ci 624e1051a39Sopenharmony_ci#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) 625e1051a39Sopenharmony_ci# if !defined(I386_ONLY) && \ 626e1051a39Sopenharmony_ci (defined(__i386) || defined(__i386__) || \ 627e1051a39Sopenharmony_ci defined(__x86_64) || defined(__x86_64__) || \ 628e1051a39Sopenharmony_ci defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) 629e1051a39Sopenharmony_ci# define GHASH_ASM_X86_OR_64 630e1051a39Sopenharmony_ci# define GCM_FUNCREF_4BIT 631e1051a39Sopenharmony_ci 632e1051a39Sopenharmony_civoid gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); 633e1051a39Sopenharmony_civoid gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); 634e1051a39Sopenharmony_civoid gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, 635e1051a39Sopenharmony_ci size_t len); 636e1051a39Sopenharmony_ci 637e1051a39Sopenharmony_ci# if defined(__i386) || defined(__i386__) || defined(_M_IX86) 638e1051a39Sopenharmony_ci# define gcm_init_avx gcm_init_clmul 639e1051a39Sopenharmony_ci# define gcm_gmult_avx gcm_gmult_clmul 640e1051a39Sopenharmony_ci# define gcm_ghash_avx gcm_ghash_clmul 641e1051a39Sopenharmony_ci# else 642e1051a39Sopenharmony_civoid gcm_init_avx(u128 Htable[16], const u64 Xi[2]); 643e1051a39Sopenharmony_civoid gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); 644e1051a39Sopenharmony_civoid gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 645e1051a39Sopenharmony_ci size_t len); 646e1051a39Sopenharmony_ci# endif 647e1051a39Sopenharmony_ci 648e1051a39Sopenharmony_ci# if defined(__i386) || defined(__i386__) || defined(_M_IX86) 649e1051a39Sopenharmony_ci# define GHASH_ASM_X86 650e1051a39Sopenharmony_civoid gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); 651e1051a39Sopenharmony_civoid gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp, 652e1051a39Sopenharmony_ci size_t len); 653e1051a39Sopenharmony_ci 654e1051a39Sopenharmony_civoid gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); 655e1051a39Sopenharmony_civoid gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, 656e1051a39Sopenharmony_ci size_t len); 657e1051a39Sopenharmony_ci# endif 658e1051a39Sopenharmony_ci# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) 659e1051a39Sopenharmony_ci# include "arm_arch.h" 660e1051a39Sopenharmony_ci# if __ARM_MAX_ARCH__>=7 661e1051a39Sopenharmony_ci# define GHASH_ASM_ARM 662e1051a39Sopenharmony_ci# define GCM_FUNCREF_4BIT 663e1051a39Sopenharmony_ci# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) 664e1051a39Sopenharmony_ci# if defined(__arm__) || defined(__arm) 665e1051a39Sopenharmony_ci# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) 666e1051a39Sopenharmony_ci# endif 667e1051a39Sopenharmony_civoid gcm_init_neon(u128 Htable[16], const u64 Xi[2]); 668e1051a39Sopenharmony_civoid gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); 669e1051a39Sopenharmony_civoid gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, 670e1051a39Sopenharmony_ci size_t len); 671e1051a39Sopenharmony_civoid gcm_init_v8(u128 Htable[16], const u64 Xi[2]); 672e1051a39Sopenharmony_civoid gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); 673e1051a39Sopenharmony_civoid gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 674e1051a39Sopenharmony_ci size_t len); 675e1051a39Sopenharmony_ci# endif 676e1051a39Sopenharmony_ci# elif defined(__sparc__) || defined(__sparc) 677e1051a39Sopenharmony_ci# include "crypto/sparc_arch.h" 678e1051a39Sopenharmony_ci# define GHASH_ASM_SPARC 679e1051a39Sopenharmony_ci# define GCM_FUNCREF_4BIT 680e1051a39Sopenharmony_civoid gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); 681e1051a39Sopenharmony_civoid gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); 682e1051a39Sopenharmony_civoid gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, 683e1051a39Sopenharmony_ci size_t len); 684e1051a39Sopenharmony_ci# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) 685e1051a39Sopenharmony_ci# include "crypto/ppc_arch.h" 686e1051a39Sopenharmony_ci# define GHASH_ASM_PPC 687e1051a39Sopenharmony_ci# define GCM_FUNCREF_4BIT 688e1051a39Sopenharmony_civoid gcm_init_p8(u128 Htable[16], const u64 Xi[2]); 689e1051a39Sopenharmony_civoid gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); 690e1051a39Sopenharmony_civoid gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, 691e1051a39Sopenharmony_ci size_t len); 692e1051a39Sopenharmony_ci# endif 693e1051a39Sopenharmony_ci#endif 694e1051a39Sopenharmony_ci 695e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT 696e1051a39Sopenharmony_ci# undef GCM_MUL 697e1051a39Sopenharmony_ci# define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) 698e1051a39Sopenharmony_ci# ifdef GHASH 699e1051a39Sopenharmony_ci# undef GHASH 700e1051a39Sopenharmony_ci# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) 701e1051a39Sopenharmony_ci# endif 702e1051a39Sopenharmony_ci#endif 703e1051a39Sopenharmony_ci 704e1051a39Sopenharmony_civoid CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) 705e1051a39Sopenharmony_ci{ 706e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 707e1051a39Sopenharmony_ci 708e1051a39Sopenharmony_ci memset(ctx, 0, sizeof(*ctx)); 709e1051a39Sopenharmony_ci ctx->block = block; 710e1051a39Sopenharmony_ci ctx->key = key; 711e1051a39Sopenharmony_ci 712e1051a39Sopenharmony_ci (*block) (ctx->H.c, ctx->H.c, key); 713e1051a39Sopenharmony_ci 714e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 715e1051a39Sopenharmony_ci /* H is stored in host byte order */ 716e1051a39Sopenharmony_ci#ifdef BSWAP8 717e1051a39Sopenharmony_ci ctx->H.u[0] = BSWAP8(ctx->H.u[0]); 718e1051a39Sopenharmony_ci ctx->H.u[1] = BSWAP8(ctx->H.u[1]); 719e1051a39Sopenharmony_ci#else 720e1051a39Sopenharmony_ci u8 *p = ctx->H.c; 721e1051a39Sopenharmony_ci u64 hi, lo; 722e1051a39Sopenharmony_ci hi = (u64)GETU32(p) << 32 | GETU32(p + 4); 723e1051a39Sopenharmony_ci lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 724e1051a39Sopenharmony_ci ctx->H.u[0] = hi; 725e1051a39Sopenharmony_ci ctx->H.u[1] = lo; 726e1051a39Sopenharmony_ci#endif 727e1051a39Sopenharmony_ci } 728e1051a39Sopenharmony_ci#if TABLE_BITS==8 729e1051a39Sopenharmony_ci gcm_init_8bit(ctx->Htable, ctx->H.u); 730e1051a39Sopenharmony_ci#elif TABLE_BITS==4 731e1051a39Sopenharmony_ci# if defined(GHASH) 732e1051a39Sopenharmony_ci# define CTX__GHASH(f) (ctx->ghash = (f)) 733e1051a39Sopenharmony_ci# else 734e1051a39Sopenharmony_ci# define CTX__GHASH(f) (ctx->ghash = NULL) 735e1051a39Sopenharmony_ci# endif 736e1051a39Sopenharmony_ci# if defined(GHASH_ASM_X86_OR_64) 737e1051a39Sopenharmony_ci# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) 738e1051a39Sopenharmony_ci if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ 739e1051a39Sopenharmony_ci if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ 740e1051a39Sopenharmony_ci gcm_init_avx(ctx->Htable, ctx->H.u); 741e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_avx; 742e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_avx); 743e1051a39Sopenharmony_ci } else { 744e1051a39Sopenharmony_ci gcm_init_clmul(ctx->Htable, ctx->H.u); 745e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_clmul; 746e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_clmul); 747e1051a39Sopenharmony_ci } 748e1051a39Sopenharmony_ci return; 749e1051a39Sopenharmony_ci } 750e1051a39Sopenharmony_ci# endif 751e1051a39Sopenharmony_ci gcm_init_4bit(ctx->Htable, ctx->H.u); 752e1051a39Sopenharmony_ci# if defined(GHASH_ASM_X86) /* x86 only */ 753e1051a39Sopenharmony_ci# if defined(OPENSSL_IA32_SSE2) 754e1051a39Sopenharmony_ci if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */ 755e1051a39Sopenharmony_ci# else 756e1051a39Sopenharmony_ci if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */ 757e1051a39Sopenharmony_ci# endif 758e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_4bit_mmx; 759e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_4bit_mmx); 760e1051a39Sopenharmony_ci } else { 761e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_4bit_x86; 762e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_4bit_x86); 763e1051a39Sopenharmony_ci } 764e1051a39Sopenharmony_ci# else 765e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_4bit; 766e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_4bit); 767e1051a39Sopenharmony_ci# endif 768e1051a39Sopenharmony_ci# elif defined(GHASH_ASM_ARM) 769e1051a39Sopenharmony_ci# ifdef PMULL_CAPABLE 770e1051a39Sopenharmony_ci if (PMULL_CAPABLE) { 771e1051a39Sopenharmony_ci gcm_init_v8(ctx->Htable, ctx->H.u); 772e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_v8; 773e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_v8); 774e1051a39Sopenharmony_ci } else 775e1051a39Sopenharmony_ci# endif 776e1051a39Sopenharmony_ci# ifdef NEON_CAPABLE 777e1051a39Sopenharmony_ci if (NEON_CAPABLE) { 778e1051a39Sopenharmony_ci gcm_init_neon(ctx->Htable, ctx->H.u); 779e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_neon; 780e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_neon); 781e1051a39Sopenharmony_ci } else 782e1051a39Sopenharmony_ci# endif 783e1051a39Sopenharmony_ci { 784e1051a39Sopenharmony_ci gcm_init_4bit(ctx->Htable, ctx->H.u); 785e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_4bit; 786e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_4bit); 787e1051a39Sopenharmony_ci } 788e1051a39Sopenharmony_ci# elif defined(GHASH_ASM_SPARC) 789e1051a39Sopenharmony_ci if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { 790e1051a39Sopenharmony_ci gcm_init_vis3(ctx->Htable, ctx->H.u); 791e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_vis3; 792e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_vis3); 793e1051a39Sopenharmony_ci } else { 794e1051a39Sopenharmony_ci gcm_init_4bit(ctx->Htable, ctx->H.u); 795e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_4bit; 796e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_4bit); 797e1051a39Sopenharmony_ci } 798e1051a39Sopenharmony_ci# elif defined(GHASH_ASM_PPC) 799e1051a39Sopenharmony_ci if (OPENSSL_ppccap_P & PPC_CRYPTO207) { 800e1051a39Sopenharmony_ci gcm_init_p8(ctx->Htable, ctx->H.u); 801e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_p8; 802e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_p8); 803e1051a39Sopenharmony_ci } else { 804e1051a39Sopenharmony_ci gcm_init_4bit(ctx->Htable, ctx->H.u); 805e1051a39Sopenharmony_ci ctx->gmult = gcm_gmult_4bit; 806e1051a39Sopenharmony_ci CTX__GHASH(gcm_ghash_4bit); 807e1051a39Sopenharmony_ci } 808e1051a39Sopenharmony_ci# else 809e1051a39Sopenharmony_ci gcm_init_4bit(ctx->Htable, ctx->H.u); 810e1051a39Sopenharmony_ci# endif 811e1051a39Sopenharmony_ci# undef CTX__GHASH 812e1051a39Sopenharmony_ci#endif 813e1051a39Sopenharmony_ci} 814e1051a39Sopenharmony_ci 815e1051a39Sopenharmony_civoid CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv, 816e1051a39Sopenharmony_ci size_t len) 817e1051a39Sopenharmony_ci{ 818e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 819e1051a39Sopenharmony_ci unsigned int ctr; 820e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT 821e1051a39Sopenharmony_ci void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 822e1051a39Sopenharmony_ci#endif 823e1051a39Sopenharmony_ci 824e1051a39Sopenharmony_ci ctx->len.u[0] = 0; /* AAD length */ 825e1051a39Sopenharmony_ci ctx->len.u[1] = 0; /* message length */ 826e1051a39Sopenharmony_ci ctx->ares = 0; 827e1051a39Sopenharmony_ci ctx->mres = 0; 828e1051a39Sopenharmony_ci 829e1051a39Sopenharmony_ci if (len == 12) { 830e1051a39Sopenharmony_ci memcpy(ctx->Yi.c, iv, 12); 831e1051a39Sopenharmony_ci ctx->Yi.c[12] = 0; 832e1051a39Sopenharmony_ci ctx->Yi.c[13] = 0; 833e1051a39Sopenharmony_ci ctx->Yi.c[14] = 0; 834e1051a39Sopenharmony_ci ctx->Yi.c[15] = 1; 835e1051a39Sopenharmony_ci ctr = 1; 836e1051a39Sopenharmony_ci } else { 837e1051a39Sopenharmony_ci size_t i; 838e1051a39Sopenharmony_ci u64 len0 = len; 839e1051a39Sopenharmony_ci 840e1051a39Sopenharmony_ci /* Borrow ctx->Xi to calculate initial Yi */ 841e1051a39Sopenharmony_ci ctx->Xi.u[0] = 0; 842e1051a39Sopenharmony_ci ctx->Xi.u[1] = 0; 843e1051a39Sopenharmony_ci 844e1051a39Sopenharmony_ci while (len >= 16) { 845e1051a39Sopenharmony_ci for (i = 0; i < 16; ++i) 846e1051a39Sopenharmony_ci ctx->Xi.c[i] ^= iv[i]; 847e1051a39Sopenharmony_ci GCM_MUL(ctx); 848e1051a39Sopenharmony_ci iv += 16; 849e1051a39Sopenharmony_ci len -= 16; 850e1051a39Sopenharmony_ci } 851e1051a39Sopenharmony_ci if (len) { 852e1051a39Sopenharmony_ci for (i = 0; i < len; ++i) 853e1051a39Sopenharmony_ci ctx->Xi.c[i] ^= iv[i]; 854e1051a39Sopenharmony_ci GCM_MUL(ctx); 855e1051a39Sopenharmony_ci } 856e1051a39Sopenharmony_ci len0 <<= 3; 857e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 858e1051a39Sopenharmony_ci#ifdef BSWAP8 859e1051a39Sopenharmony_ci ctx->Xi.u[1] ^= BSWAP8(len0); 860e1051a39Sopenharmony_ci#else 861e1051a39Sopenharmony_ci ctx->Xi.c[8] ^= (u8)(len0 >> 56); 862e1051a39Sopenharmony_ci ctx->Xi.c[9] ^= (u8)(len0 >> 48); 863e1051a39Sopenharmony_ci ctx->Xi.c[10] ^= (u8)(len0 >> 40); 864e1051a39Sopenharmony_ci ctx->Xi.c[11] ^= (u8)(len0 >> 32); 865e1051a39Sopenharmony_ci ctx->Xi.c[12] ^= (u8)(len0 >> 24); 866e1051a39Sopenharmony_ci ctx->Xi.c[13] ^= (u8)(len0 >> 16); 867e1051a39Sopenharmony_ci ctx->Xi.c[14] ^= (u8)(len0 >> 8); 868e1051a39Sopenharmony_ci ctx->Xi.c[15] ^= (u8)(len0); 869e1051a39Sopenharmony_ci#endif 870e1051a39Sopenharmony_ci } else { 871e1051a39Sopenharmony_ci ctx->Xi.u[1] ^= len0; 872e1051a39Sopenharmony_ci } 873e1051a39Sopenharmony_ci 874e1051a39Sopenharmony_ci GCM_MUL(ctx); 875e1051a39Sopenharmony_ci 876e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 877e1051a39Sopenharmony_ci#ifdef BSWAP4 878e1051a39Sopenharmony_ci ctr = BSWAP4(ctx->Xi.d[3]); 879e1051a39Sopenharmony_ci#else 880e1051a39Sopenharmony_ci ctr = GETU32(ctx->Xi.c + 12); 881e1051a39Sopenharmony_ci#endif 882e1051a39Sopenharmony_ci else 883e1051a39Sopenharmony_ci ctr = ctx->Xi.d[3]; 884e1051a39Sopenharmony_ci 885e1051a39Sopenharmony_ci /* Copy borrowed Xi to Yi */ 886e1051a39Sopenharmony_ci ctx->Yi.u[0] = ctx->Xi.u[0]; 887e1051a39Sopenharmony_ci ctx->Yi.u[1] = ctx->Xi.u[1]; 888e1051a39Sopenharmony_ci } 889e1051a39Sopenharmony_ci 890e1051a39Sopenharmony_ci ctx->Xi.u[0] = 0; 891e1051a39Sopenharmony_ci ctx->Xi.u[1] = 0; 892e1051a39Sopenharmony_ci 893e1051a39Sopenharmony_ci (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key); 894e1051a39Sopenharmony_ci ++ctr; 895e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 896e1051a39Sopenharmony_ci#ifdef BSWAP4 897e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 898e1051a39Sopenharmony_ci#else 899e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 900e1051a39Sopenharmony_ci#endif 901e1051a39Sopenharmony_ci else 902e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 903e1051a39Sopenharmony_ci} 904e1051a39Sopenharmony_ci 905e1051a39Sopenharmony_ciint CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad, 906e1051a39Sopenharmony_ci size_t len) 907e1051a39Sopenharmony_ci{ 908e1051a39Sopenharmony_ci size_t i; 909e1051a39Sopenharmony_ci unsigned int n; 910e1051a39Sopenharmony_ci u64 alen = ctx->len.u[0]; 911e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT 912e1051a39Sopenharmony_ci void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 913e1051a39Sopenharmony_ci# ifdef GHASH 914e1051a39Sopenharmony_ci void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 915e1051a39Sopenharmony_ci const u8 *inp, size_t len) = ctx->ghash; 916e1051a39Sopenharmony_ci# endif 917e1051a39Sopenharmony_ci#endif 918e1051a39Sopenharmony_ci 919e1051a39Sopenharmony_ci if (ctx->len.u[1]) 920e1051a39Sopenharmony_ci return -2; 921e1051a39Sopenharmony_ci 922e1051a39Sopenharmony_ci alen += len; 923e1051a39Sopenharmony_ci if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len)) 924e1051a39Sopenharmony_ci return -1; 925e1051a39Sopenharmony_ci ctx->len.u[0] = alen; 926e1051a39Sopenharmony_ci 927e1051a39Sopenharmony_ci n = ctx->ares; 928e1051a39Sopenharmony_ci if (n) { 929e1051a39Sopenharmony_ci while (n && len) { 930e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= *(aad++); 931e1051a39Sopenharmony_ci --len; 932e1051a39Sopenharmony_ci n = (n + 1) % 16; 933e1051a39Sopenharmony_ci } 934e1051a39Sopenharmony_ci if (n == 0) 935e1051a39Sopenharmony_ci GCM_MUL(ctx); 936e1051a39Sopenharmony_ci else { 937e1051a39Sopenharmony_ci ctx->ares = n; 938e1051a39Sopenharmony_ci return 0; 939e1051a39Sopenharmony_ci } 940e1051a39Sopenharmony_ci } 941e1051a39Sopenharmony_ci#ifdef GHASH 942e1051a39Sopenharmony_ci if ((i = (len & (size_t)-16))) { 943e1051a39Sopenharmony_ci GHASH(ctx, aad, i); 944e1051a39Sopenharmony_ci aad += i; 945e1051a39Sopenharmony_ci len -= i; 946e1051a39Sopenharmony_ci } 947e1051a39Sopenharmony_ci#else 948e1051a39Sopenharmony_ci while (len >= 16) { 949e1051a39Sopenharmony_ci for (i = 0; i < 16; ++i) 950e1051a39Sopenharmony_ci ctx->Xi.c[i] ^= aad[i]; 951e1051a39Sopenharmony_ci GCM_MUL(ctx); 952e1051a39Sopenharmony_ci aad += 16; 953e1051a39Sopenharmony_ci len -= 16; 954e1051a39Sopenharmony_ci } 955e1051a39Sopenharmony_ci#endif 956e1051a39Sopenharmony_ci if (len) { 957e1051a39Sopenharmony_ci n = (unsigned int)len; 958e1051a39Sopenharmony_ci for (i = 0; i < len; ++i) 959e1051a39Sopenharmony_ci ctx->Xi.c[i] ^= aad[i]; 960e1051a39Sopenharmony_ci } 961e1051a39Sopenharmony_ci 962e1051a39Sopenharmony_ci ctx->ares = n; 963e1051a39Sopenharmony_ci return 0; 964e1051a39Sopenharmony_ci} 965e1051a39Sopenharmony_ci 966e1051a39Sopenharmony_ciint CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, 967e1051a39Sopenharmony_ci const unsigned char *in, unsigned char *out, 968e1051a39Sopenharmony_ci size_t len) 969e1051a39Sopenharmony_ci{ 970e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 971e1051a39Sopenharmony_ci unsigned int n, ctr, mres; 972e1051a39Sopenharmony_ci size_t i; 973e1051a39Sopenharmony_ci u64 mlen = ctx->len.u[1]; 974e1051a39Sopenharmony_ci block128_f block = ctx->block; 975e1051a39Sopenharmony_ci void *key = ctx->key; 976e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT 977e1051a39Sopenharmony_ci void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 978e1051a39Sopenharmony_ci# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 979e1051a39Sopenharmony_ci void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 980e1051a39Sopenharmony_ci const u8 *inp, size_t len) = ctx->ghash; 981e1051a39Sopenharmony_ci# endif 982e1051a39Sopenharmony_ci#endif 983e1051a39Sopenharmony_ci 984e1051a39Sopenharmony_ci mlen += len; 985e1051a39Sopenharmony_ci if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 986e1051a39Sopenharmony_ci return -1; 987e1051a39Sopenharmony_ci ctx->len.u[1] = mlen; 988e1051a39Sopenharmony_ci 989e1051a39Sopenharmony_ci mres = ctx->mres; 990e1051a39Sopenharmony_ci 991e1051a39Sopenharmony_ci if (ctx->ares) { 992e1051a39Sopenharmony_ci /* First call to encrypt finalizes GHASH(AAD) */ 993e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 994e1051a39Sopenharmony_ci if (len == 0) { 995e1051a39Sopenharmony_ci GCM_MUL(ctx); 996e1051a39Sopenharmony_ci ctx->ares = 0; 997e1051a39Sopenharmony_ci return 0; 998e1051a39Sopenharmony_ci } 999e1051a39Sopenharmony_ci memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1000e1051a39Sopenharmony_ci ctx->Xi.u[0] = 0; 1001e1051a39Sopenharmony_ci ctx->Xi.u[1] = 0; 1002e1051a39Sopenharmony_ci mres = sizeof(ctx->Xi); 1003e1051a39Sopenharmony_ci#else 1004e1051a39Sopenharmony_ci GCM_MUL(ctx); 1005e1051a39Sopenharmony_ci#endif 1006e1051a39Sopenharmony_ci ctx->ares = 0; 1007e1051a39Sopenharmony_ci } 1008e1051a39Sopenharmony_ci 1009e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1010e1051a39Sopenharmony_ci#ifdef BSWAP4 1011e1051a39Sopenharmony_ci ctr = BSWAP4(ctx->Yi.d[3]); 1012e1051a39Sopenharmony_ci#else 1013e1051a39Sopenharmony_ci ctr = GETU32(ctx->Yi.c + 12); 1014e1051a39Sopenharmony_ci#endif 1015e1051a39Sopenharmony_ci else 1016e1051a39Sopenharmony_ci ctr = ctx->Yi.d[3]; 1017e1051a39Sopenharmony_ci 1018e1051a39Sopenharmony_ci n = mres % 16; 1019e1051a39Sopenharmony_ci#if !defined(OPENSSL_SMALL_FOOTPRINT) 1020e1051a39Sopenharmony_ci if (16 % sizeof(size_t) == 0) { /* always true actually */ 1021e1051a39Sopenharmony_ci do { 1022e1051a39Sopenharmony_ci if (n) { 1023e1051a39Sopenharmony_ci# if defined(GHASH) 1024e1051a39Sopenharmony_ci while (n && len) { 1025e1051a39Sopenharmony_ci ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1026e1051a39Sopenharmony_ci --len; 1027e1051a39Sopenharmony_ci n = (n + 1) % 16; 1028e1051a39Sopenharmony_ci } 1029e1051a39Sopenharmony_ci if (n == 0) { 1030e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1031e1051a39Sopenharmony_ci mres = 0; 1032e1051a39Sopenharmony_ci } else { 1033e1051a39Sopenharmony_ci ctx->mres = mres; 1034e1051a39Sopenharmony_ci return 0; 1035e1051a39Sopenharmony_ci } 1036e1051a39Sopenharmony_ci# else 1037e1051a39Sopenharmony_ci while (n && len) { 1038e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1039e1051a39Sopenharmony_ci --len; 1040e1051a39Sopenharmony_ci n = (n + 1) % 16; 1041e1051a39Sopenharmony_ci } 1042e1051a39Sopenharmony_ci if (n == 0) { 1043e1051a39Sopenharmony_ci GCM_MUL(ctx); 1044e1051a39Sopenharmony_ci mres = 0; 1045e1051a39Sopenharmony_ci } else { 1046e1051a39Sopenharmony_ci ctx->mres = n; 1047e1051a39Sopenharmony_ci return 0; 1048e1051a39Sopenharmony_ci } 1049e1051a39Sopenharmony_ci# endif 1050e1051a39Sopenharmony_ci } 1051e1051a39Sopenharmony_ci# if defined(STRICT_ALIGNMENT) 1052e1051a39Sopenharmony_ci if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1053e1051a39Sopenharmony_ci break; 1054e1051a39Sopenharmony_ci# endif 1055e1051a39Sopenharmony_ci# if defined(GHASH) 1056e1051a39Sopenharmony_ci if (len >= 16 && mres) { 1057e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1058e1051a39Sopenharmony_ci mres = 0; 1059e1051a39Sopenharmony_ci } 1060e1051a39Sopenharmony_ci# if defined(GHASH_CHUNK) 1061e1051a39Sopenharmony_ci while (len >= GHASH_CHUNK) { 1062e1051a39Sopenharmony_ci size_t j = GHASH_CHUNK; 1063e1051a39Sopenharmony_ci 1064e1051a39Sopenharmony_ci while (j) { 1065e1051a39Sopenharmony_ci size_t_aX *out_t = (size_t_aX *)out; 1066e1051a39Sopenharmony_ci const size_t_aX *in_t = (const size_t_aX *)in; 1067e1051a39Sopenharmony_ci 1068e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1069e1051a39Sopenharmony_ci ++ctr; 1070e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1071e1051a39Sopenharmony_ci# ifdef BSWAP4 1072e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1073e1051a39Sopenharmony_ci# else 1074e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1075e1051a39Sopenharmony_ci# endif 1076e1051a39Sopenharmony_ci else 1077e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1078e1051a39Sopenharmony_ci for (i = 0; i < 16 / sizeof(size_t); ++i) 1079e1051a39Sopenharmony_ci out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1080e1051a39Sopenharmony_ci out += 16; 1081e1051a39Sopenharmony_ci in += 16; 1082e1051a39Sopenharmony_ci j -= 16; 1083e1051a39Sopenharmony_ci } 1084e1051a39Sopenharmony_ci GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK); 1085e1051a39Sopenharmony_ci len -= GHASH_CHUNK; 1086e1051a39Sopenharmony_ci } 1087e1051a39Sopenharmony_ci# endif 1088e1051a39Sopenharmony_ci if ((i = (len & (size_t)-16))) { 1089e1051a39Sopenharmony_ci size_t j = i; 1090e1051a39Sopenharmony_ci 1091e1051a39Sopenharmony_ci while (len >= 16) { 1092e1051a39Sopenharmony_ci size_t_aX *out_t = (size_t_aX *)out; 1093e1051a39Sopenharmony_ci const size_t_aX *in_t = (const size_t_aX *)in; 1094e1051a39Sopenharmony_ci 1095e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1096e1051a39Sopenharmony_ci ++ctr; 1097e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1098e1051a39Sopenharmony_ci# ifdef BSWAP4 1099e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1100e1051a39Sopenharmony_ci# else 1101e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1102e1051a39Sopenharmony_ci# endif 1103e1051a39Sopenharmony_ci else 1104e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1105e1051a39Sopenharmony_ci for (i = 0; i < 16 / sizeof(size_t); ++i) 1106e1051a39Sopenharmony_ci out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1107e1051a39Sopenharmony_ci out += 16; 1108e1051a39Sopenharmony_ci in += 16; 1109e1051a39Sopenharmony_ci len -= 16; 1110e1051a39Sopenharmony_ci } 1111e1051a39Sopenharmony_ci GHASH(ctx, out - j, j); 1112e1051a39Sopenharmony_ci } 1113e1051a39Sopenharmony_ci# else 1114e1051a39Sopenharmony_ci while (len >= 16) { 1115e1051a39Sopenharmony_ci size_t *out_t = (size_t *)out; 1116e1051a39Sopenharmony_ci const size_t *in_t = (const size_t *)in; 1117e1051a39Sopenharmony_ci 1118e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1119e1051a39Sopenharmony_ci ++ctr; 1120e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1121e1051a39Sopenharmony_ci# ifdef BSWAP4 1122e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1123e1051a39Sopenharmony_ci# else 1124e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1125e1051a39Sopenharmony_ci# endif 1126e1051a39Sopenharmony_ci else 1127e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1128e1051a39Sopenharmony_ci for (i = 0; i < 16 / sizeof(size_t); ++i) 1129e1051a39Sopenharmony_ci ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1130e1051a39Sopenharmony_ci GCM_MUL(ctx); 1131e1051a39Sopenharmony_ci out += 16; 1132e1051a39Sopenharmony_ci in += 16; 1133e1051a39Sopenharmony_ci len -= 16; 1134e1051a39Sopenharmony_ci } 1135e1051a39Sopenharmony_ci# endif 1136e1051a39Sopenharmony_ci if (len) { 1137e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1138e1051a39Sopenharmony_ci ++ctr; 1139e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1140e1051a39Sopenharmony_ci# ifdef BSWAP4 1141e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1142e1051a39Sopenharmony_ci# else 1143e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1144e1051a39Sopenharmony_ci# endif 1145e1051a39Sopenharmony_ci else 1146e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1147e1051a39Sopenharmony_ci# if defined(GHASH) 1148e1051a39Sopenharmony_ci while (len--) { 1149e1051a39Sopenharmony_ci ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1150e1051a39Sopenharmony_ci ++n; 1151e1051a39Sopenharmony_ci } 1152e1051a39Sopenharmony_ci# else 1153e1051a39Sopenharmony_ci while (len--) { 1154e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1155e1051a39Sopenharmony_ci ++n; 1156e1051a39Sopenharmony_ci } 1157e1051a39Sopenharmony_ci mres = n; 1158e1051a39Sopenharmony_ci# endif 1159e1051a39Sopenharmony_ci } 1160e1051a39Sopenharmony_ci 1161e1051a39Sopenharmony_ci ctx->mres = mres; 1162e1051a39Sopenharmony_ci return 0; 1163e1051a39Sopenharmony_ci } while (0); 1164e1051a39Sopenharmony_ci } 1165e1051a39Sopenharmony_ci#endif 1166e1051a39Sopenharmony_ci for (i = 0; i < len; ++i) { 1167e1051a39Sopenharmony_ci if (n == 0) { 1168e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1169e1051a39Sopenharmony_ci ++ctr; 1170e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1171e1051a39Sopenharmony_ci#ifdef BSWAP4 1172e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1173e1051a39Sopenharmony_ci#else 1174e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1175e1051a39Sopenharmony_ci#endif 1176e1051a39Sopenharmony_ci else 1177e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1178e1051a39Sopenharmony_ci } 1179e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1180e1051a39Sopenharmony_ci ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n]; 1181e1051a39Sopenharmony_ci n = (n + 1) % 16; 1182e1051a39Sopenharmony_ci if (mres == sizeof(ctx->Xn)) { 1183e1051a39Sopenharmony_ci GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1184e1051a39Sopenharmony_ci mres = 0; 1185e1051a39Sopenharmony_ci } 1186e1051a39Sopenharmony_ci#else 1187e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; 1188e1051a39Sopenharmony_ci mres = n = (n + 1) % 16; 1189e1051a39Sopenharmony_ci if (n == 0) 1190e1051a39Sopenharmony_ci GCM_MUL(ctx); 1191e1051a39Sopenharmony_ci#endif 1192e1051a39Sopenharmony_ci } 1193e1051a39Sopenharmony_ci 1194e1051a39Sopenharmony_ci ctx->mres = mres; 1195e1051a39Sopenharmony_ci return 0; 1196e1051a39Sopenharmony_ci} 1197e1051a39Sopenharmony_ci 1198e1051a39Sopenharmony_ciint CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, 1199e1051a39Sopenharmony_ci const unsigned char *in, unsigned char *out, 1200e1051a39Sopenharmony_ci size_t len) 1201e1051a39Sopenharmony_ci{ 1202e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 1203e1051a39Sopenharmony_ci unsigned int n, ctr, mres; 1204e1051a39Sopenharmony_ci size_t i; 1205e1051a39Sopenharmony_ci u64 mlen = ctx->len.u[1]; 1206e1051a39Sopenharmony_ci block128_f block = ctx->block; 1207e1051a39Sopenharmony_ci void *key = ctx->key; 1208e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT 1209e1051a39Sopenharmony_ci void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1210e1051a39Sopenharmony_ci# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1211e1051a39Sopenharmony_ci void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1212e1051a39Sopenharmony_ci const u8 *inp, size_t len) = ctx->ghash; 1213e1051a39Sopenharmony_ci# endif 1214e1051a39Sopenharmony_ci#endif 1215e1051a39Sopenharmony_ci 1216e1051a39Sopenharmony_ci mlen += len; 1217e1051a39Sopenharmony_ci if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1218e1051a39Sopenharmony_ci return -1; 1219e1051a39Sopenharmony_ci ctx->len.u[1] = mlen; 1220e1051a39Sopenharmony_ci 1221e1051a39Sopenharmony_ci mres = ctx->mres; 1222e1051a39Sopenharmony_ci 1223e1051a39Sopenharmony_ci if (ctx->ares) { 1224e1051a39Sopenharmony_ci /* First call to decrypt finalizes GHASH(AAD) */ 1225e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1226e1051a39Sopenharmony_ci if (len == 0) { 1227e1051a39Sopenharmony_ci GCM_MUL(ctx); 1228e1051a39Sopenharmony_ci ctx->ares = 0; 1229e1051a39Sopenharmony_ci return 0; 1230e1051a39Sopenharmony_ci } 1231e1051a39Sopenharmony_ci memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1232e1051a39Sopenharmony_ci ctx->Xi.u[0] = 0; 1233e1051a39Sopenharmony_ci ctx->Xi.u[1] = 0; 1234e1051a39Sopenharmony_ci mres = sizeof(ctx->Xi); 1235e1051a39Sopenharmony_ci#else 1236e1051a39Sopenharmony_ci GCM_MUL(ctx); 1237e1051a39Sopenharmony_ci#endif 1238e1051a39Sopenharmony_ci ctx->ares = 0; 1239e1051a39Sopenharmony_ci } 1240e1051a39Sopenharmony_ci 1241e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1242e1051a39Sopenharmony_ci#ifdef BSWAP4 1243e1051a39Sopenharmony_ci ctr = BSWAP4(ctx->Yi.d[3]); 1244e1051a39Sopenharmony_ci#else 1245e1051a39Sopenharmony_ci ctr = GETU32(ctx->Yi.c + 12); 1246e1051a39Sopenharmony_ci#endif 1247e1051a39Sopenharmony_ci else 1248e1051a39Sopenharmony_ci ctr = ctx->Yi.d[3]; 1249e1051a39Sopenharmony_ci 1250e1051a39Sopenharmony_ci n = mres % 16; 1251e1051a39Sopenharmony_ci#if !defined(OPENSSL_SMALL_FOOTPRINT) 1252e1051a39Sopenharmony_ci if (16 % sizeof(size_t) == 0) { /* always true actually */ 1253e1051a39Sopenharmony_ci do { 1254e1051a39Sopenharmony_ci if (n) { 1255e1051a39Sopenharmony_ci# if defined(GHASH) 1256e1051a39Sopenharmony_ci while (n && len) { 1257e1051a39Sopenharmony_ci *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1258e1051a39Sopenharmony_ci --len; 1259e1051a39Sopenharmony_ci n = (n + 1) % 16; 1260e1051a39Sopenharmony_ci } 1261e1051a39Sopenharmony_ci if (n == 0) { 1262e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1263e1051a39Sopenharmony_ci mres = 0; 1264e1051a39Sopenharmony_ci } else { 1265e1051a39Sopenharmony_ci ctx->mres = mres; 1266e1051a39Sopenharmony_ci return 0; 1267e1051a39Sopenharmony_ci } 1268e1051a39Sopenharmony_ci# else 1269e1051a39Sopenharmony_ci while (n && len) { 1270e1051a39Sopenharmony_ci u8 c = *(in++); 1271e1051a39Sopenharmony_ci *(out++) = c ^ ctx->EKi.c[n]; 1272e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= c; 1273e1051a39Sopenharmony_ci --len; 1274e1051a39Sopenharmony_ci n = (n + 1) % 16; 1275e1051a39Sopenharmony_ci } 1276e1051a39Sopenharmony_ci if (n == 0) { 1277e1051a39Sopenharmony_ci GCM_MUL(ctx); 1278e1051a39Sopenharmony_ci mres = 0; 1279e1051a39Sopenharmony_ci } else { 1280e1051a39Sopenharmony_ci ctx->mres = n; 1281e1051a39Sopenharmony_ci return 0; 1282e1051a39Sopenharmony_ci } 1283e1051a39Sopenharmony_ci# endif 1284e1051a39Sopenharmony_ci } 1285e1051a39Sopenharmony_ci# if defined(STRICT_ALIGNMENT) 1286e1051a39Sopenharmony_ci if (((size_t)in | (size_t)out) % sizeof(size_t) != 0) 1287e1051a39Sopenharmony_ci break; 1288e1051a39Sopenharmony_ci# endif 1289e1051a39Sopenharmony_ci# if defined(GHASH) 1290e1051a39Sopenharmony_ci if (len >= 16 && mres) { 1291e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1292e1051a39Sopenharmony_ci mres = 0; 1293e1051a39Sopenharmony_ci } 1294e1051a39Sopenharmony_ci# if defined(GHASH_CHUNK) 1295e1051a39Sopenharmony_ci while (len >= GHASH_CHUNK) { 1296e1051a39Sopenharmony_ci size_t j = GHASH_CHUNK; 1297e1051a39Sopenharmony_ci 1298e1051a39Sopenharmony_ci GHASH(ctx, in, GHASH_CHUNK); 1299e1051a39Sopenharmony_ci while (j) { 1300e1051a39Sopenharmony_ci size_t_aX *out_t = (size_t_aX *)out; 1301e1051a39Sopenharmony_ci const size_t_aX *in_t = (const size_t_aX *)in; 1302e1051a39Sopenharmony_ci 1303e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1304e1051a39Sopenharmony_ci ++ctr; 1305e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1306e1051a39Sopenharmony_ci# ifdef BSWAP4 1307e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1308e1051a39Sopenharmony_ci# else 1309e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1310e1051a39Sopenharmony_ci# endif 1311e1051a39Sopenharmony_ci else 1312e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1313e1051a39Sopenharmony_ci for (i = 0; i < 16 / sizeof(size_t); ++i) 1314e1051a39Sopenharmony_ci out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1315e1051a39Sopenharmony_ci out += 16; 1316e1051a39Sopenharmony_ci in += 16; 1317e1051a39Sopenharmony_ci j -= 16; 1318e1051a39Sopenharmony_ci } 1319e1051a39Sopenharmony_ci len -= GHASH_CHUNK; 1320e1051a39Sopenharmony_ci } 1321e1051a39Sopenharmony_ci# endif 1322e1051a39Sopenharmony_ci if ((i = (len & (size_t)-16))) { 1323e1051a39Sopenharmony_ci GHASH(ctx, in, i); 1324e1051a39Sopenharmony_ci while (len >= 16) { 1325e1051a39Sopenharmony_ci size_t_aX *out_t = (size_t_aX *)out; 1326e1051a39Sopenharmony_ci const size_t_aX *in_t = (const size_t_aX *)in; 1327e1051a39Sopenharmony_ci 1328e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1329e1051a39Sopenharmony_ci ++ctr; 1330e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1331e1051a39Sopenharmony_ci# ifdef BSWAP4 1332e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1333e1051a39Sopenharmony_ci# else 1334e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1335e1051a39Sopenharmony_ci# endif 1336e1051a39Sopenharmony_ci else 1337e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1338e1051a39Sopenharmony_ci for (i = 0; i < 16 / sizeof(size_t); ++i) 1339e1051a39Sopenharmony_ci out_t[i] = in_t[i] ^ ctx->EKi.t[i]; 1340e1051a39Sopenharmony_ci out += 16; 1341e1051a39Sopenharmony_ci in += 16; 1342e1051a39Sopenharmony_ci len -= 16; 1343e1051a39Sopenharmony_ci } 1344e1051a39Sopenharmony_ci } 1345e1051a39Sopenharmony_ci# else 1346e1051a39Sopenharmony_ci while (len >= 16) { 1347e1051a39Sopenharmony_ci size_t *out_t = (size_t *)out; 1348e1051a39Sopenharmony_ci const size_t *in_t = (const size_t *)in; 1349e1051a39Sopenharmony_ci 1350e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1351e1051a39Sopenharmony_ci ++ctr; 1352e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1353e1051a39Sopenharmony_ci# ifdef BSWAP4 1354e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1355e1051a39Sopenharmony_ci# else 1356e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1357e1051a39Sopenharmony_ci# endif 1358e1051a39Sopenharmony_ci else 1359e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1360e1051a39Sopenharmony_ci for (i = 0; i < 16 / sizeof(size_t); ++i) { 1361e1051a39Sopenharmony_ci size_t c = in_t[i]; 1362e1051a39Sopenharmony_ci out_t[i] = c ^ ctx->EKi.t[i]; 1363e1051a39Sopenharmony_ci ctx->Xi.t[i] ^= c; 1364e1051a39Sopenharmony_ci } 1365e1051a39Sopenharmony_ci GCM_MUL(ctx); 1366e1051a39Sopenharmony_ci out += 16; 1367e1051a39Sopenharmony_ci in += 16; 1368e1051a39Sopenharmony_ci len -= 16; 1369e1051a39Sopenharmony_ci } 1370e1051a39Sopenharmony_ci# endif 1371e1051a39Sopenharmony_ci if (len) { 1372e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1373e1051a39Sopenharmony_ci ++ctr; 1374e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1375e1051a39Sopenharmony_ci# ifdef BSWAP4 1376e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1377e1051a39Sopenharmony_ci# else 1378e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1379e1051a39Sopenharmony_ci# endif 1380e1051a39Sopenharmony_ci else 1381e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1382e1051a39Sopenharmony_ci# if defined(GHASH) 1383e1051a39Sopenharmony_ci while (len--) { 1384e1051a39Sopenharmony_ci out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1385e1051a39Sopenharmony_ci ++n; 1386e1051a39Sopenharmony_ci } 1387e1051a39Sopenharmony_ci# else 1388e1051a39Sopenharmony_ci while (len--) { 1389e1051a39Sopenharmony_ci u8 c = in[n]; 1390e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= c; 1391e1051a39Sopenharmony_ci out[n] = c ^ ctx->EKi.c[n]; 1392e1051a39Sopenharmony_ci ++n; 1393e1051a39Sopenharmony_ci } 1394e1051a39Sopenharmony_ci mres = n; 1395e1051a39Sopenharmony_ci# endif 1396e1051a39Sopenharmony_ci } 1397e1051a39Sopenharmony_ci 1398e1051a39Sopenharmony_ci ctx->mres = mres; 1399e1051a39Sopenharmony_ci return 0; 1400e1051a39Sopenharmony_ci } while (0); 1401e1051a39Sopenharmony_ci } 1402e1051a39Sopenharmony_ci#endif 1403e1051a39Sopenharmony_ci for (i = 0; i < len; ++i) { 1404e1051a39Sopenharmony_ci u8 c; 1405e1051a39Sopenharmony_ci if (n == 0) { 1406e1051a39Sopenharmony_ci (*block) (ctx->Yi.c, ctx->EKi.c, key); 1407e1051a39Sopenharmony_ci ++ctr; 1408e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1409e1051a39Sopenharmony_ci#ifdef BSWAP4 1410e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1411e1051a39Sopenharmony_ci#else 1412e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1413e1051a39Sopenharmony_ci#endif 1414e1051a39Sopenharmony_ci else 1415e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1416e1051a39Sopenharmony_ci } 1417e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1418e1051a39Sopenharmony_ci out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n]; 1419e1051a39Sopenharmony_ci n = (n + 1) % 16; 1420e1051a39Sopenharmony_ci if (mres == sizeof(ctx->Xn)) { 1421e1051a39Sopenharmony_ci GHASH(ctx,ctx->Xn,sizeof(ctx->Xn)); 1422e1051a39Sopenharmony_ci mres = 0; 1423e1051a39Sopenharmony_ci } 1424e1051a39Sopenharmony_ci#else 1425e1051a39Sopenharmony_ci c = in[i]; 1426e1051a39Sopenharmony_ci out[i] = c ^ ctx->EKi.c[n]; 1427e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= c; 1428e1051a39Sopenharmony_ci mres = n = (n + 1) % 16; 1429e1051a39Sopenharmony_ci if (n == 0) 1430e1051a39Sopenharmony_ci GCM_MUL(ctx); 1431e1051a39Sopenharmony_ci#endif 1432e1051a39Sopenharmony_ci } 1433e1051a39Sopenharmony_ci 1434e1051a39Sopenharmony_ci ctx->mres = mres; 1435e1051a39Sopenharmony_ci return 0; 1436e1051a39Sopenharmony_ci} 1437e1051a39Sopenharmony_ci 1438e1051a39Sopenharmony_ciint CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, 1439e1051a39Sopenharmony_ci const unsigned char *in, unsigned char *out, 1440e1051a39Sopenharmony_ci size_t len, ctr128_f stream) 1441e1051a39Sopenharmony_ci{ 1442e1051a39Sopenharmony_ci#if defined(OPENSSL_SMALL_FOOTPRINT) 1443e1051a39Sopenharmony_ci return CRYPTO_gcm128_encrypt(ctx, in, out, len); 1444e1051a39Sopenharmony_ci#else 1445e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 1446e1051a39Sopenharmony_ci unsigned int n, ctr, mres; 1447e1051a39Sopenharmony_ci size_t i; 1448e1051a39Sopenharmony_ci u64 mlen = ctx->len.u[1]; 1449e1051a39Sopenharmony_ci void *key = ctx->key; 1450e1051a39Sopenharmony_ci# ifdef GCM_FUNCREF_4BIT 1451e1051a39Sopenharmony_ci void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1452e1051a39Sopenharmony_ci# ifdef GHASH 1453e1051a39Sopenharmony_ci void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1454e1051a39Sopenharmony_ci const u8 *inp, size_t len) = ctx->ghash; 1455e1051a39Sopenharmony_ci# endif 1456e1051a39Sopenharmony_ci# endif 1457e1051a39Sopenharmony_ci 1458e1051a39Sopenharmony_ci mlen += len; 1459e1051a39Sopenharmony_ci if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1460e1051a39Sopenharmony_ci return -1; 1461e1051a39Sopenharmony_ci ctx->len.u[1] = mlen; 1462e1051a39Sopenharmony_ci 1463e1051a39Sopenharmony_ci mres = ctx->mres; 1464e1051a39Sopenharmony_ci 1465e1051a39Sopenharmony_ci if (ctx->ares) { 1466e1051a39Sopenharmony_ci /* First call to encrypt finalizes GHASH(AAD) */ 1467e1051a39Sopenharmony_ci#if defined(GHASH) 1468e1051a39Sopenharmony_ci if (len == 0) { 1469e1051a39Sopenharmony_ci GCM_MUL(ctx); 1470e1051a39Sopenharmony_ci ctx->ares = 0; 1471e1051a39Sopenharmony_ci return 0; 1472e1051a39Sopenharmony_ci } 1473e1051a39Sopenharmony_ci memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1474e1051a39Sopenharmony_ci ctx->Xi.u[0] = 0; 1475e1051a39Sopenharmony_ci ctx->Xi.u[1] = 0; 1476e1051a39Sopenharmony_ci mres = sizeof(ctx->Xi); 1477e1051a39Sopenharmony_ci#else 1478e1051a39Sopenharmony_ci GCM_MUL(ctx); 1479e1051a39Sopenharmony_ci#endif 1480e1051a39Sopenharmony_ci ctx->ares = 0; 1481e1051a39Sopenharmony_ci } 1482e1051a39Sopenharmony_ci 1483e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1484e1051a39Sopenharmony_ci# ifdef BSWAP4 1485e1051a39Sopenharmony_ci ctr = BSWAP4(ctx->Yi.d[3]); 1486e1051a39Sopenharmony_ci# else 1487e1051a39Sopenharmony_ci ctr = GETU32(ctx->Yi.c + 12); 1488e1051a39Sopenharmony_ci# endif 1489e1051a39Sopenharmony_ci else 1490e1051a39Sopenharmony_ci ctr = ctx->Yi.d[3]; 1491e1051a39Sopenharmony_ci 1492e1051a39Sopenharmony_ci n = mres % 16; 1493e1051a39Sopenharmony_ci if (n) { 1494e1051a39Sopenharmony_ci# if defined(GHASH) 1495e1051a39Sopenharmony_ci while (n && len) { 1496e1051a39Sopenharmony_ci ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n]; 1497e1051a39Sopenharmony_ci --len; 1498e1051a39Sopenharmony_ci n = (n + 1) % 16; 1499e1051a39Sopenharmony_ci } 1500e1051a39Sopenharmony_ci if (n == 0) { 1501e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1502e1051a39Sopenharmony_ci mres = 0; 1503e1051a39Sopenharmony_ci } else { 1504e1051a39Sopenharmony_ci ctx->mres = mres; 1505e1051a39Sopenharmony_ci return 0; 1506e1051a39Sopenharmony_ci } 1507e1051a39Sopenharmony_ci# else 1508e1051a39Sopenharmony_ci while (n && len) { 1509e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n]; 1510e1051a39Sopenharmony_ci --len; 1511e1051a39Sopenharmony_ci n = (n + 1) % 16; 1512e1051a39Sopenharmony_ci } 1513e1051a39Sopenharmony_ci if (n == 0) { 1514e1051a39Sopenharmony_ci GCM_MUL(ctx); 1515e1051a39Sopenharmony_ci mres = 0; 1516e1051a39Sopenharmony_ci } else { 1517e1051a39Sopenharmony_ci ctx->mres = n; 1518e1051a39Sopenharmony_ci return 0; 1519e1051a39Sopenharmony_ci } 1520e1051a39Sopenharmony_ci# endif 1521e1051a39Sopenharmony_ci } 1522e1051a39Sopenharmony_ci# if defined(GHASH) 1523e1051a39Sopenharmony_ci if (len >= 16 && mres) { 1524e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1525e1051a39Sopenharmony_ci mres = 0; 1526e1051a39Sopenharmony_ci } 1527e1051a39Sopenharmony_ci# if defined(GHASH_CHUNK) 1528e1051a39Sopenharmony_ci while (len >= GHASH_CHUNK) { 1529e1051a39Sopenharmony_ci (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1530e1051a39Sopenharmony_ci ctr += GHASH_CHUNK / 16; 1531e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1532e1051a39Sopenharmony_ci# ifdef BSWAP4 1533e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1534e1051a39Sopenharmony_ci# else 1535e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1536e1051a39Sopenharmony_ci# endif 1537e1051a39Sopenharmony_ci else 1538e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1539e1051a39Sopenharmony_ci GHASH(ctx, out, GHASH_CHUNK); 1540e1051a39Sopenharmony_ci out += GHASH_CHUNK; 1541e1051a39Sopenharmony_ci in += GHASH_CHUNK; 1542e1051a39Sopenharmony_ci len -= GHASH_CHUNK; 1543e1051a39Sopenharmony_ci } 1544e1051a39Sopenharmony_ci# endif 1545e1051a39Sopenharmony_ci# endif 1546e1051a39Sopenharmony_ci if ((i = (len & (size_t)-16))) { 1547e1051a39Sopenharmony_ci size_t j = i / 16; 1548e1051a39Sopenharmony_ci 1549e1051a39Sopenharmony_ci (*stream) (in, out, j, key, ctx->Yi.c); 1550e1051a39Sopenharmony_ci ctr += (unsigned int)j; 1551e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1552e1051a39Sopenharmony_ci# ifdef BSWAP4 1553e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1554e1051a39Sopenharmony_ci# else 1555e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1556e1051a39Sopenharmony_ci# endif 1557e1051a39Sopenharmony_ci else 1558e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1559e1051a39Sopenharmony_ci in += i; 1560e1051a39Sopenharmony_ci len -= i; 1561e1051a39Sopenharmony_ci# if defined(GHASH) 1562e1051a39Sopenharmony_ci GHASH(ctx, out, i); 1563e1051a39Sopenharmony_ci out += i; 1564e1051a39Sopenharmony_ci# else 1565e1051a39Sopenharmony_ci while (j--) { 1566e1051a39Sopenharmony_ci for (i = 0; i < 16; ++i) 1567e1051a39Sopenharmony_ci ctx->Xi.c[i] ^= out[i]; 1568e1051a39Sopenharmony_ci GCM_MUL(ctx); 1569e1051a39Sopenharmony_ci out += 16; 1570e1051a39Sopenharmony_ci } 1571e1051a39Sopenharmony_ci# endif 1572e1051a39Sopenharmony_ci } 1573e1051a39Sopenharmony_ci if (len) { 1574e1051a39Sopenharmony_ci (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1575e1051a39Sopenharmony_ci ++ctr; 1576e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1577e1051a39Sopenharmony_ci# ifdef BSWAP4 1578e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1579e1051a39Sopenharmony_ci# else 1580e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1581e1051a39Sopenharmony_ci# endif 1582e1051a39Sopenharmony_ci else 1583e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1584e1051a39Sopenharmony_ci while (len--) { 1585e1051a39Sopenharmony_ci# if defined(GHASH) 1586e1051a39Sopenharmony_ci ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n]; 1587e1051a39Sopenharmony_ci# else 1588e1051a39Sopenharmony_ci ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n]; 1589e1051a39Sopenharmony_ci# endif 1590e1051a39Sopenharmony_ci ++n; 1591e1051a39Sopenharmony_ci } 1592e1051a39Sopenharmony_ci } 1593e1051a39Sopenharmony_ci 1594e1051a39Sopenharmony_ci ctx->mres = mres; 1595e1051a39Sopenharmony_ci return 0; 1596e1051a39Sopenharmony_ci#endif 1597e1051a39Sopenharmony_ci} 1598e1051a39Sopenharmony_ci 1599e1051a39Sopenharmony_ciint CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, 1600e1051a39Sopenharmony_ci const unsigned char *in, unsigned char *out, 1601e1051a39Sopenharmony_ci size_t len, ctr128_f stream) 1602e1051a39Sopenharmony_ci{ 1603e1051a39Sopenharmony_ci#if defined(OPENSSL_SMALL_FOOTPRINT) 1604e1051a39Sopenharmony_ci return CRYPTO_gcm128_decrypt(ctx, in, out, len); 1605e1051a39Sopenharmony_ci#else 1606e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 1607e1051a39Sopenharmony_ci unsigned int n, ctr, mres; 1608e1051a39Sopenharmony_ci size_t i; 1609e1051a39Sopenharmony_ci u64 mlen = ctx->len.u[1]; 1610e1051a39Sopenharmony_ci void *key = ctx->key; 1611e1051a39Sopenharmony_ci# ifdef GCM_FUNCREF_4BIT 1612e1051a39Sopenharmony_ci void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1613e1051a39Sopenharmony_ci# ifdef GHASH 1614e1051a39Sopenharmony_ci void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1615e1051a39Sopenharmony_ci const u8 *inp, size_t len) = ctx->ghash; 1616e1051a39Sopenharmony_ci# endif 1617e1051a39Sopenharmony_ci# endif 1618e1051a39Sopenharmony_ci 1619e1051a39Sopenharmony_ci mlen += len; 1620e1051a39Sopenharmony_ci if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1621e1051a39Sopenharmony_ci return -1; 1622e1051a39Sopenharmony_ci ctx->len.u[1] = mlen; 1623e1051a39Sopenharmony_ci 1624e1051a39Sopenharmony_ci mres = ctx->mres; 1625e1051a39Sopenharmony_ci 1626e1051a39Sopenharmony_ci if (ctx->ares) { 1627e1051a39Sopenharmony_ci /* First call to decrypt finalizes GHASH(AAD) */ 1628e1051a39Sopenharmony_ci# if defined(GHASH) 1629e1051a39Sopenharmony_ci if (len == 0) { 1630e1051a39Sopenharmony_ci GCM_MUL(ctx); 1631e1051a39Sopenharmony_ci ctx->ares = 0; 1632e1051a39Sopenharmony_ci return 0; 1633e1051a39Sopenharmony_ci } 1634e1051a39Sopenharmony_ci memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi)); 1635e1051a39Sopenharmony_ci ctx->Xi.u[0] = 0; 1636e1051a39Sopenharmony_ci ctx->Xi.u[1] = 0; 1637e1051a39Sopenharmony_ci mres = sizeof(ctx->Xi); 1638e1051a39Sopenharmony_ci# else 1639e1051a39Sopenharmony_ci GCM_MUL(ctx); 1640e1051a39Sopenharmony_ci# endif 1641e1051a39Sopenharmony_ci ctx->ares = 0; 1642e1051a39Sopenharmony_ci } 1643e1051a39Sopenharmony_ci 1644e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1645e1051a39Sopenharmony_ci# ifdef BSWAP4 1646e1051a39Sopenharmony_ci ctr = BSWAP4(ctx->Yi.d[3]); 1647e1051a39Sopenharmony_ci# else 1648e1051a39Sopenharmony_ci ctr = GETU32(ctx->Yi.c + 12); 1649e1051a39Sopenharmony_ci# endif 1650e1051a39Sopenharmony_ci else 1651e1051a39Sopenharmony_ci ctr = ctx->Yi.d[3]; 1652e1051a39Sopenharmony_ci 1653e1051a39Sopenharmony_ci n = mres % 16; 1654e1051a39Sopenharmony_ci if (n) { 1655e1051a39Sopenharmony_ci# if defined(GHASH) 1656e1051a39Sopenharmony_ci while (n && len) { 1657e1051a39Sopenharmony_ci *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n]; 1658e1051a39Sopenharmony_ci --len; 1659e1051a39Sopenharmony_ci n = (n + 1) % 16; 1660e1051a39Sopenharmony_ci } 1661e1051a39Sopenharmony_ci if (n == 0) { 1662e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1663e1051a39Sopenharmony_ci mres = 0; 1664e1051a39Sopenharmony_ci } else { 1665e1051a39Sopenharmony_ci ctx->mres = mres; 1666e1051a39Sopenharmony_ci return 0; 1667e1051a39Sopenharmony_ci } 1668e1051a39Sopenharmony_ci# else 1669e1051a39Sopenharmony_ci while (n && len) { 1670e1051a39Sopenharmony_ci u8 c = *(in++); 1671e1051a39Sopenharmony_ci *(out++) = c ^ ctx->EKi.c[n]; 1672e1051a39Sopenharmony_ci ctx->Xi.c[n] ^= c; 1673e1051a39Sopenharmony_ci --len; 1674e1051a39Sopenharmony_ci n = (n + 1) % 16; 1675e1051a39Sopenharmony_ci } 1676e1051a39Sopenharmony_ci if (n == 0) { 1677e1051a39Sopenharmony_ci GCM_MUL(ctx); 1678e1051a39Sopenharmony_ci mres = 0; 1679e1051a39Sopenharmony_ci } else { 1680e1051a39Sopenharmony_ci ctx->mres = n; 1681e1051a39Sopenharmony_ci return 0; 1682e1051a39Sopenharmony_ci } 1683e1051a39Sopenharmony_ci# endif 1684e1051a39Sopenharmony_ci } 1685e1051a39Sopenharmony_ci# if defined(GHASH) 1686e1051a39Sopenharmony_ci if (len >= 16 && mres) { 1687e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1688e1051a39Sopenharmony_ci mres = 0; 1689e1051a39Sopenharmony_ci } 1690e1051a39Sopenharmony_ci# if defined(GHASH_CHUNK) 1691e1051a39Sopenharmony_ci while (len >= GHASH_CHUNK) { 1692e1051a39Sopenharmony_ci GHASH(ctx, in, GHASH_CHUNK); 1693e1051a39Sopenharmony_ci (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c); 1694e1051a39Sopenharmony_ci ctr += GHASH_CHUNK / 16; 1695e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1696e1051a39Sopenharmony_ci# ifdef BSWAP4 1697e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1698e1051a39Sopenharmony_ci# else 1699e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1700e1051a39Sopenharmony_ci# endif 1701e1051a39Sopenharmony_ci else 1702e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1703e1051a39Sopenharmony_ci out += GHASH_CHUNK; 1704e1051a39Sopenharmony_ci in += GHASH_CHUNK; 1705e1051a39Sopenharmony_ci len -= GHASH_CHUNK; 1706e1051a39Sopenharmony_ci } 1707e1051a39Sopenharmony_ci# endif 1708e1051a39Sopenharmony_ci# endif 1709e1051a39Sopenharmony_ci if ((i = (len & (size_t)-16))) { 1710e1051a39Sopenharmony_ci size_t j = i / 16; 1711e1051a39Sopenharmony_ci 1712e1051a39Sopenharmony_ci# if defined(GHASH) 1713e1051a39Sopenharmony_ci GHASH(ctx, in, i); 1714e1051a39Sopenharmony_ci# else 1715e1051a39Sopenharmony_ci while (j--) { 1716e1051a39Sopenharmony_ci size_t k; 1717e1051a39Sopenharmony_ci for (k = 0; k < 16; ++k) 1718e1051a39Sopenharmony_ci ctx->Xi.c[k] ^= in[k]; 1719e1051a39Sopenharmony_ci GCM_MUL(ctx); 1720e1051a39Sopenharmony_ci in += 16; 1721e1051a39Sopenharmony_ci } 1722e1051a39Sopenharmony_ci j = i / 16; 1723e1051a39Sopenharmony_ci in -= i; 1724e1051a39Sopenharmony_ci# endif 1725e1051a39Sopenharmony_ci (*stream) (in, out, j, key, ctx->Yi.c); 1726e1051a39Sopenharmony_ci ctr += (unsigned int)j; 1727e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1728e1051a39Sopenharmony_ci# ifdef BSWAP4 1729e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1730e1051a39Sopenharmony_ci# else 1731e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1732e1051a39Sopenharmony_ci# endif 1733e1051a39Sopenharmony_ci else 1734e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1735e1051a39Sopenharmony_ci out += i; 1736e1051a39Sopenharmony_ci in += i; 1737e1051a39Sopenharmony_ci len -= i; 1738e1051a39Sopenharmony_ci } 1739e1051a39Sopenharmony_ci if (len) { 1740e1051a39Sopenharmony_ci (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key); 1741e1051a39Sopenharmony_ci ++ctr; 1742e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) 1743e1051a39Sopenharmony_ci# ifdef BSWAP4 1744e1051a39Sopenharmony_ci ctx->Yi.d[3] = BSWAP4(ctr); 1745e1051a39Sopenharmony_ci# else 1746e1051a39Sopenharmony_ci PUTU32(ctx->Yi.c + 12, ctr); 1747e1051a39Sopenharmony_ci# endif 1748e1051a39Sopenharmony_ci else 1749e1051a39Sopenharmony_ci ctx->Yi.d[3] = ctr; 1750e1051a39Sopenharmony_ci while (len--) { 1751e1051a39Sopenharmony_ci# if defined(GHASH) 1752e1051a39Sopenharmony_ci out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n]; 1753e1051a39Sopenharmony_ci# else 1754e1051a39Sopenharmony_ci u8 c = in[n]; 1755e1051a39Sopenharmony_ci ctx->Xi.c[mres++] ^= c; 1756e1051a39Sopenharmony_ci out[n] = c ^ ctx->EKi.c[n]; 1757e1051a39Sopenharmony_ci# endif 1758e1051a39Sopenharmony_ci ++n; 1759e1051a39Sopenharmony_ci } 1760e1051a39Sopenharmony_ci } 1761e1051a39Sopenharmony_ci 1762e1051a39Sopenharmony_ci ctx->mres = mres; 1763e1051a39Sopenharmony_ci return 0; 1764e1051a39Sopenharmony_ci#endif 1765e1051a39Sopenharmony_ci} 1766e1051a39Sopenharmony_ci 1767e1051a39Sopenharmony_ciint CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, 1768e1051a39Sopenharmony_ci size_t len) 1769e1051a39Sopenharmony_ci{ 1770e1051a39Sopenharmony_ci DECLARE_IS_ENDIAN; 1771e1051a39Sopenharmony_ci u64 alen = ctx->len.u[0] << 3; 1772e1051a39Sopenharmony_ci u64 clen = ctx->len.u[1] << 3; 1773e1051a39Sopenharmony_ci#ifdef GCM_FUNCREF_4BIT 1774e1051a39Sopenharmony_ci void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult; 1775e1051a39Sopenharmony_ci# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1776e1051a39Sopenharmony_ci void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16], 1777e1051a39Sopenharmony_ci const u8 *inp, size_t len) = ctx->ghash; 1778e1051a39Sopenharmony_ci# endif 1779e1051a39Sopenharmony_ci#endif 1780e1051a39Sopenharmony_ci 1781e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1782e1051a39Sopenharmony_ci u128 bitlen; 1783e1051a39Sopenharmony_ci unsigned int mres = ctx->mres; 1784e1051a39Sopenharmony_ci 1785e1051a39Sopenharmony_ci if (mres) { 1786e1051a39Sopenharmony_ci unsigned blocks = (mres + 15) & -16; 1787e1051a39Sopenharmony_ci 1788e1051a39Sopenharmony_ci memset(ctx->Xn + mres, 0, blocks - mres); 1789e1051a39Sopenharmony_ci mres = blocks; 1790e1051a39Sopenharmony_ci if (mres == sizeof(ctx->Xn)) { 1791e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1792e1051a39Sopenharmony_ci mres = 0; 1793e1051a39Sopenharmony_ci } 1794e1051a39Sopenharmony_ci } else if (ctx->ares) { 1795e1051a39Sopenharmony_ci GCM_MUL(ctx); 1796e1051a39Sopenharmony_ci } 1797e1051a39Sopenharmony_ci#else 1798e1051a39Sopenharmony_ci if (ctx->mres || ctx->ares) 1799e1051a39Sopenharmony_ci GCM_MUL(ctx); 1800e1051a39Sopenharmony_ci#endif 1801e1051a39Sopenharmony_ci 1802e1051a39Sopenharmony_ci if (IS_LITTLE_ENDIAN) { 1803e1051a39Sopenharmony_ci#ifdef BSWAP8 1804e1051a39Sopenharmony_ci alen = BSWAP8(alen); 1805e1051a39Sopenharmony_ci clen = BSWAP8(clen); 1806e1051a39Sopenharmony_ci#else 1807e1051a39Sopenharmony_ci u8 *p = ctx->len.c; 1808e1051a39Sopenharmony_ci 1809e1051a39Sopenharmony_ci ctx->len.u[0] = alen; 1810e1051a39Sopenharmony_ci ctx->len.u[1] = clen; 1811e1051a39Sopenharmony_ci 1812e1051a39Sopenharmony_ci alen = (u64)GETU32(p) << 32 | GETU32(p + 4); 1813e1051a39Sopenharmony_ci clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); 1814e1051a39Sopenharmony_ci#endif 1815e1051a39Sopenharmony_ci } 1816e1051a39Sopenharmony_ci 1817e1051a39Sopenharmony_ci#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) 1818e1051a39Sopenharmony_ci bitlen.hi = alen; 1819e1051a39Sopenharmony_ci bitlen.lo = clen; 1820e1051a39Sopenharmony_ci memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen)); 1821e1051a39Sopenharmony_ci mres += sizeof(bitlen); 1822e1051a39Sopenharmony_ci GHASH(ctx, ctx->Xn, mres); 1823e1051a39Sopenharmony_ci#else 1824e1051a39Sopenharmony_ci ctx->Xi.u[0] ^= alen; 1825e1051a39Sopenharmony_ci ctx->Xi.u[1] ^= clen; 1826e1051a39Sopenharmony_ci GCM_MUL(ctx); 1827e1051a39Sopenharmony_ci#endif 1828e1051a39Sopenharmony_ci 1829e1051a39Sopenharmony_ci ctx->Xi.u[0] ^= ctx->EK0.u[0]; 1830e1051a39Sopenharmony_ci ctx->Xi.u[1] ^= ctx->EK0.u[1]; 1831e1051a39Sopenharmony_ci 1832e1051a39Sopenharmony_ci if (tag && len <= sizeof(ctx->Xi)) 1833e1051a39Sopenharmony_ci return CRYPTO_memcmp(ctx->Xi.c, tag, len); 1834e1051a39Sopenharmony_ci else 1835e1051a39Sopenharmony_ci return -1; 1836e1051a39Sopenharmony_ci} 1837e1051a39Sopenharmony_ci 1838e1051a39Sopenharmony_civoid CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) 1839e1051a39Sopenharmony_ci{ 1840e1051a39Sopenharmony_ci CRYPTO_gcm128_finish(ctx, NULL, 0); 1841e1051a39Sopenharmony_ci memcpy(tag, ctx->Xi.c, 1842e1051a39Sopenharmony_ci len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c)); 1843e1051a39Sopenharmony_ci} 1844e1051a39Sopenharmony_ci 1845e1051a39Sopenharmony_ciGCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) 1846e1051a39Sopenharmony_ci{ 1847e1051a39Sopenharmony_ci GCM128_CONTEXT *ret; 1848e1051a39Sopenharmony_ci 1849e1051a39Sopenharmony_ci if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL) 1850e1051a39Sopenharmony_ci CRYPTO_gcm128_init(ret, key, block); 1851e1051a39Sopenharmony_ci 1852e1051a39Sopenharmony_ci return ret; 1853e1051a39Sopenharmony_ci} 1854e1051a39Sopenharmony_ci 1855e1051a39Sopenharmony_civoid CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) 1856e1051a39Sopenharmony_ci{ 1857e1051a39Sopenharmony_ci OPENSSL_clear_free(ctx, sizeof(*ctx)); 1858e1051a39Sopenharmony_ci} 1859