18c2ecf20Sopenharmony_ci/* gf128mul.h - GF(2^128) multiplication functions 28c2ecf20Sopenharmony_ci * 38c2ecf20Sopenharmony_ci * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. 48c2ecf20Sopenharmony_ci * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Based on Dr Brian Gladman's (GPL'd) work published at 78c2ecf20Sopenharmony_ci * http://fp.gladman.plus.com/cryptography_technology/index.htm 88c2ecf20Sopenharmony_ci * See the original copyright notice below. 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify it 118c2ecf20Sopenharmony_ci * under the terms of the GNU General Public License as published by the Free 128c2ecf20Sopenharmony_ci * Software Foundation; either version 2 of the License, or (at your option) 138c2ecf20Sopenharmony_ci * any later version. 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci/* 168c2ecf20Sopenharmony_ci --------------------------------------------------------------------------- 178c2ecf20Sopenharmony_ci Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci LICENSE TERMS 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci The free distribution and use of this software in both source and binary 228c2ecf20Sopenharmony_ci form is allowed (with or without changes) provided that: 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci 1. distributions of this source code include the above copyright 258c2ecf20Sopenharmony_ci notice, this list of conditions and the following disclaimer; 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci 2. distributions in binary form include the above copyright 288c2ecf20Sopenharmony_ci notice, this list of conditions and the following disclaimer 298c2ecf20Sopenharmony_ci in the documentation and/or other associated materials; 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci 3. the copyright holder's name is not used to endorse products 328c2ecf20Sopenharmony_ci built using this software without specific written permission. 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci ALTERNATIVELY, provided that this notice is retained in full, this product 358c2ecf20Sopenharmony_ci may be distributed under the terms of the GNU General Public License (GPL), 368c2ecf20Sopenharmony_ci in which case the provisions of the GPL apply INSTEAD OF those given above. 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci DISCLAIMER 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci This software is provided 'as is' with no explicit or implied warranties 418c2ecf20Sopenharmony_ci in respect of its properties, including, but not limited to, correctness 428c2ecf20Sopenharmony_ci and/or fitness for purpose. 438c2ecf20Sopenharmony_ci --------------------------------------------------------------------------- 448c2ecf20Sopenharmony_ci Issue Date: 31/01/2006 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci An implementation of field multiplication in Galois Field GF(2^128) 478c2ecf20Sopenharmony_ci*/ 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci#ifndef _CRYPTO_GF128MUL_H 508c2ecf20Sopenharmony_ci#define _CRYPTO_GF128MUL_H 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#include <asm/byteorder.h> 538c2ecf20Sopenharmony_ci#include <crypto/b128ops.h> 548c2ecf20Sopenharmony_ci#include <linux/slab.h> 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci/* Comment by Rik: 578c2ecf20Sopenharmony_ci * 588c2ecf20Sopenharmony_ci * For some background on GF(2^128) see for example: 598c2ecf20Sopenharmony_ci * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can 628c2ecf20Sopenharmony_ci * be mapped to computer memory in a variety of ways. Let's examine 638c2ecf20Sopenharmony_ci * three common cases. 648c2ecf20Sopenharmony_ci * 658c2ecf20Sopenharmony_ci * Take a look at the 16 binary octets below in memory order. The msb's 668c2ecf20Sopenharmony_ci * are left and the lsb's are right. char b[16] is an array and b[0] is 678c2ecf20Sopenharmony_ci * the first octet. 688c2ecf20Sopenharmony_ci * 698c2ecf20Sopenharmony_ci * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 708c2ecf20Sopenharmony_ci * b[0] b[1] b[2] b[3] b[13] b[14] b[15] 718c2ecf20Sopenharmony_ci * 728c2ecf20Sopenharmony_ci * Every bit is a coefficient of some power of X. We can store the bits 738c2ecf20Sopenharmony_ci * in every byte in little-endian order and the bytes themselves also in 748c2ecf20Sopenharmony_ci * little endian order. I will call this lle (little-little-endian). 758c2ecf20Sopenharmony_ci * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks 768c2ecf20Sopenharmony_ci * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. 778c2ecf20Sopenharmony_ci * This format was originally implemented in gf128mul and is used 788c2ecf20Sopenharmony_ci * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). 798c2ecf20Sopenharmony_ci * 808c2ecf20Sopenharmony_ci * Another convention says: store the bits in bigendian order and the 818c2ecf20Sopenharmony_ci * bytes also. This is bbe (big-big-endian). Now the buffer above 828c2ecf20Sopenharmony_ci * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, 838c2ecf20Sopenharmony_ci * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe 848c2ecf20Sopenharmony_ci * is partly implemented. 858c2ecf20Sopenharmony_ci * 868c2ecf20Sopenharmony_ci * Both of the above formats are easy to implement on big-endian 878c2ecf20Sopenharmony_ci * machines. 888c2ecf20Sopenharmony_ci * 898c2ecf20Sopenharmony_ci * XTS and EME (the latter of which is patent encumbered) use the ble 908c2ecf20Sopenharmony_ci * format (bits are stored in big endian order and the bytes in little 918c2ecf20Sopenharmony_ci * endian). The above buffer represents X^7 in this case and the 928c2ecf20Sopenharmony_ci * primitive polynomial is b[0] = 0x87. 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * The common machine word-size is smaller than 128 bits, so to make 958c2ecf20Sopenharmony_ci * an efficient implementation we must split into machine word sizes. 968c2ecf20Sopenharmony_ci * This implementation uses 64-bit words for the moment. Machine 978c2ecf20Sopenharmony_ci * endianness comes into play. The lle format in relation to machine 988c2ecf20Sopenharmony_ci * endianness is discussed below by the original author of gf128mul Dr 998c2ecf20Sopenharmony_ci * Brian Gladman. 1008c2ecf20Sopenharmony_ci * 1018c2ecf20Sopenharmony_ci * Let's look at the bbe and ble format on a little endian machine. 1028c2ecf20Sopenharmony_ci * 1038c2ecf20Sopenharmony_ci * bbe on a little endian machine u32 x[4]: 1048c2ecf20Sopenharmony_ci * 1058c2ecf20Sopenharmony_ci * MS x[0] LS MS x[1] LS 1068c2ecf20Sopenharmony_ci * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1078c2ecf20Sopenharmony_ci * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 1088c2ecf20Sopenharmony_ci * 1098c2ecf20Sopenharmony_ci * MS x[2] LS MS x[3] LS 1108c2ecf20Sopenharmony_ci * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1118c2ecf20Sopenharmony_ci * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 1128c2ecf20Sopenharmony_ci * 1138c2ecf20Sopenharmony_ci * ble on a little endian machine 1148c2ecf20Sopenharmony_ci * 1158c2ecf20Sopenharmony_ci * MS x[0] LS MS x[1] LS 1168c2ecf20Sopenharmony_ci * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1178c2ecf20Sopenharmony_ci * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 1188c2ecf20Sopenharmony_ci * 1198c2ecf20Sopenharmony_ci * MS x[2] LS MS x[3] LS 1208c2ecf20Sopenharmony_ci * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1218c2ecf20Sopenharmony_ci * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 1228c2ecf20Sopenharmony_ci * 1238c2ecf20Sopenharmony_ci * Multiplications in GF(2^128) are mostly bit-shifts, so you see why 1248c2ecf20Sopenharmony_ci * ble (and lbe also) are easier to implement on a little-endian 1258c2ecf20Sopenharmony_ci * machine than on a big-endian machine. The converse holds for bbe 1268c2ecf20Sopenharmony_ci * and lle. 1278c2ecf20Sopenharmony_ci * 1288c2ecf20Sopenharmony_ci * Note: to have good alignment, it seems to me that it is sufficient 1298c2ecf20Sopenharmony_ci * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize 1308c2ecf20Sopenharmony_ci * machines this will automatically aligned to wordsize and on a 64-bit 1318c2ecf20Sopenharmony_ci * machine also. 1328c2ecf20Sopenharmony_ci */ 1338c2ecf20Sopenharmony_ci/* Multiply a GF(2^128) field element by x. Field elements are 1348c2ecf20Sopenharmony_ci held in arrays of bytes in which field bits 8n..8n + 7 are held in 1358c2ecf20Sopenharmony_ci byte[n], with lower indexed bits placed in the more numerically 1368c2ecf20Sopenharmony_ci significant bit positions within bytes. 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci On little endian machines the bit indexes translate into the bit 1398c2ecf20Sopenharmony_ci positions within four 32-bit words in the following way 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci MS x[0] LS MS x[1] LS 1428c2ecf20Sopenharmony_ci ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1438c2ecf20Sopenharmony_ci 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci MS x[2] LS MS x[3] LS 1468c2ecf20Sopenharmony_ci ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1478c2ecf20Sopenharmony_ci 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci On big endian machines the bit indexes translate into the bit 1508c2ecf20Sopenharmony_ci positions within four 32-bit words in the following way 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci MS x[0] LS MS x[1] LS 1538c2ecf20Sopenharmony_ci ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1548c2ecf20Sopenharmony_ci 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci MS x[2] LS MS x[3] LS 1578c2ecf20Sopenharmony_ci ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls 1588c2ecf20Sopenharmony_ci 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 1598c2ecf20Sopenharmony_ci*/ 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci/* A slow generic version of gf_mul, implemented for lle and bbe 1628c2ecf20Sopenharmony_ci * It multiplies a and b and puts the result in a */ 1638c2ecf20Sopenharmony_civoid gf128mul_lle(be128 *a, const be128 *b); 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_civoid gf128mul_bbe(be128 *a, const be128 *b); 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci/* 1688c2ecf20Sopenharmony_ci * The following functions multiply a field element by x in 1698c2ecf20Sopenharmony_ci * the polynomial field representation. They use 64-bit word operations 1708c2ecf20Sopenharmony_ci * to gain speed but compensate for machine endianness and hence work 1718c2ecf20Sopenharmony_ci * correctly on both styles of machine. 1728c2ecf20Sopenharmony_ci * 1738c2ecf20Sopenharmony_ci * They are defined here for performance. 1748c2ecf20Sopenharmony_ci */ 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_cistatic inline u64 gf128mul_mask_from_bit(u64 x, int which) 1778c2ecf20Sopenharmony_ci{ 1788c2ecf20Sopenharmony_ci /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */ 1798c2ecf20Sopenharmony_ci return ((s64)(x << (63 - which)) >> 63); 1808c2ecf20Sopenharmony_ci} 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_cistatic inline void gf128mul_x_lle(be128 *r, const be128 *x) 1838c2ecf20Sopenharmony_ci{ 1848c2ecf20Sopenharmony_ci u64 a = be64_to_cpu(x->a); 1858c2ecf20Sopenharmony_ci u64 b = be64_to_cpu(x->b); 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48 1888c2ecf20Sopenharmony_ci * (see crypto/gf128mul.c): */ 1898c2ecf20Sopenharmony_ci u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56); 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci r->b = cpu_to_be64((b >> 1) | (a << 63)); 1928c2ecf20Sopenharmony_ci r->a = cpu_to_be64((a >> 1) ^ _tt); 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_cistatic inline void gf128mul_x_bbe(be128 *r, const be128 *x) 1968c2ecf20Sopenharmony_ci{ 1978c2ecf20Sopenharmony_ci u64 a = be64_to_cpu(x->a); 1988c2ecf20Sopenharmony_ci u64 b = be64_to_cpu(x->b); 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */ 2018c2ecf20Sopenharmony_ci u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci r->a = cpu_to_be64((a << 1) | (b >> 63)); 2048c2ecf20Sopenharmony_ci r->b = cpu_to_be64((b << 1) ^ _tt); 2058c2ecf20Sopenharmony_ci} 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci/* needed by XTS */ 2088c2ecf20Sopenharmony_cistatic inline void gf128mul_x_ble(le128 *r, const le128 *x) 2098c2ecf20Sopenharmony_ci{ 2108c2ecf20Sopenharmony_ci u64 a = le64_to_cpu(x->a); 2118c2ecf20Sopenharmony_ci u64 b = le64_to_cpu(x->b); 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */ 2148c2ecf20Sopenharmony_ci u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci r->a = cpu_to_le64((a << 1) | (b >> 63)); 2178c2ecf20Sopenharmony_ci r->b = cpu_to_le64((b << 1) ^ _tt); 2188c2ecf20Sopenharmony_ci} 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci/* 4k table optimization */ 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cistruct gf128mul_4k { 2238c2ecf20Sopenharmony_ci be128 t[256]; 2248c2ecf20Sopenharmony_ci}; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistruct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); 2278c2ecf20Sopenharmony_cistruct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); 2288c2ecf20Sopenharmony_civoid gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t); 2298c2ecf20Sopenharmony_civoid gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t); 2308c2ecf20Sopenharmony_civoid gf128mul_x8_ble(le128 *r, const le128 *x); 2318c2ecf20Sopenharmony_cistatic inline void gf128mul_free_4k(struct gf128mul_4k *t) 2328c2ecf20Sopenharmony_ci{ 2338c2ecf20Sopenharmony_ci kfree_sensitive(t); 2348c2ecf20Sopenharmony_ci} 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci/* 64k table optimization, implemented for bbe */ 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_cistruct gf128mul_64k { 2408c2ecf20Sopenharmony_ci struct gf128mul_4k *t[16]; 2418c2ecf20Sopenharmony_ci}; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci/* First initialize with the constant factor with which you 2448c2ecf20Sopenharmony_ci * want to multiply and then call gf128mul_64k_bbe with the other 2458c2ecf20Sopenharmony_ci * factor in the first argument, and the table in the second. 2468c2ecf20Sopenharmony_ci * Afterwards, the result is stored in *a. 2478c2ecf20Sopenharmony_ci */ 2488c2ecf20Sopenharmony_cistruct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); 2498c2ecf20Sopenharmony_civoid gf128mul_free_64k(struct gf128mul_64k *t); 2508c2ecf20Sopenharmony_civoid gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t); 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci#endif /* _CRYPTO_GF128MUL_H */ 253