18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * SHA1 routine optimized to do word accesses rather than byte accesses, 48c2ecf20Sopenharmony_ci * and to avoid unnecessary copies into the context array. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * This was based on the git SHA1 implementation. 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include <linux/kernel.h> 108c2ecf20Sopenharmony_ci#include <linux/export.h> 118c2ecf20Sopenharmony_ci#include <linux/bitops.h> 128c2ecf20Sopenharmony_ci#include <linux/string.h> 138c2ecf20Sopenharmony_ci#include <crypto/sha.h> 148c2ecf20Sopenharmony_ci#include <asm/unaligned.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci/* 178c2ecf20Sopenharmony_ci * If you have 32 registers or more, the compiler can (and should) 188c2ecf20Sopenharmony_ci * try to change the array[] accesses into registers. However, on 198c2ecf20Sopenharmony_ci * machines with less than ~25 registers, that won't really work, 208c2ecf20Sopenharmony_ci * and at least gcc will make an unholy mess of it. 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * So to avoid that mess which just slows things down, we force 238c2ecf20Sopenharmony_ci * the stores to memory to actually happen (we might be better off 248c2ecf20Sopenharmony_ci * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as 258c2ecf20Sopenharmony_ci * suggested by Artur Skawina - that will also make gcc unable to 268c2ecf20Sopenharmony_ci * try to do the silly "optimize away loads" part because it won't 278c2ecf20Sopenharmony_ci * see what the value will be). 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * Ben Herrenschmidt reports that on PPC, the C version comes close 308c2ecf20Sopenharmony_ci * to the optimized asm with this (ie on PPC you don't want that 318c2ecf20Sopenharmony_ci * 'volatile', since there are lots of registers). 328c2ecf20Sopenharmony_ci * 338c2ecf20Sopenharmony_ci * On ARM we get the best code generation by forcing a full memory barrier 348c2ecf20Sopenharmony_ci * between each SHA_ROUND, otherwise gcc happily get wild with spilling and 358c2ecf20Sopenharmony_ci * the stack frame size simply explode and performance goes down the drain. 368c2ecf20Sopenharmony_ci */ 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#ifdef CONFIG_X86 398c2ecf20Sopenharmony_ci #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) 408c2ecf20Sopenharmony_ci#elif defined(CONFIG_ARM) 418c2ecf20Sopenharmony_ci #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0) 428c2ecf20Sopenharmony_ci#else 438c2ecf20Sopenharmony_ci #define setW(x, val) (W(x) = (val)) 448c2ecf20Sopenharmony_ci#endif 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci/* This "rolls" over the 512-bit array */ 478c2ecf20Sopenharmony_ci#define W(x) (array[(x)&15]) 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci/* 508c2ecf20Sopenharmony_ci * Where do we get the source from? The first 16 iterations get it from 518c2ecf20Sopenharmony_ci * the input data, the next mix it from the 512-bit array. 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_ci#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) 548c2ecf20Sopenharmony_ci#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1) 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ 578c2ecf20Sopenharmony_ci __u32 TEMP = input(t); setW(t, TEMP); \ 588c2ecf20Sopenharmony_ci E += TEMP + rol32(A,5) + (fn) + (constant); \ 598c2ecf20Sopenharmony_ci B = ror32(B, 2); \ 608c2ecf20Sopenharmony_ci TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) 638c2ecf20Sopenharmony_ci#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) 648c2ecf20Sopenharmony_ci#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E ) 658c2ecf20Sopenharmony_ci#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E ) 668c2ecf20Sopenharmony_ci#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E ) 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci/** 698c2ecf20Sopenharmony_ci * sha1_transform - single block SHA1 transform (deprecated) 708c2ecf20Sopenharmony_ci * 718c2ecf20Sopenharmony_ci * @digest: 160 bit digest to update 728c2ecf20Sopenharmony_ci * @data: 512 bits of data to hash 738c2ecf20Sopenharmony_ci * @array: 16 words of workspace (see note) 748c2ecf20Sopenharmony_ci * 758c2ecf20Sopenharmony_ci * This function executes SHA-1's internal compression function. It updates the 768c2ecf20Sopenharmony_ci * 160-bit internal state (@digest) with a single 512-bit data block (@data). 778c2ecf20Sopenharmony_ci * 788c2ecf20Sopenharmony_ci * Don't use this function. SHA-1 is no longer considered secure. And even if 798c2ecf20Sopenharmony_ci * you do have to use SHA-1, this isn't the correct way to hash something with 808c2ecf20Sopenharmony_ci * SHA-1 as this doesn't handle padding and finalization. 818c2ecf20Sopenharmony_ci * 828c2ecf20Sopenharmony_ci * Note: If the hash is security sensitive, the caller should be sure 838c2ecf20Sopenharmony_ci * to clear the workspace. This is left to the caller to avoid 848c2ecf20Sopenharmony_ci * unnecessary clears between chained hashing operations. 858c2ecf20Sopenharmony_ci */ 868c2ecf20Sopenharmony_civoid sha1_transform(__u32 *digest, const char *data, __u32 *array) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci __u32 A, B, C, D, E; 898c2ecf20Sopenharmony_ci unsigned int i = 0; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci A = digest[0]; 928c2ecf20Sopenharmony_ci B = digest[1]; 938c2ecf20Sopenharmony_ci C = digest[2]; 948c2ecf20Sopenharmony_ci D = digest[3]; 958c2ecf20Sopenharmony_ci E = digest[4]; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci /* Round 1 - iterations 0-16 take their input from 'data' */ 988c2ecf20Sopenharmony_ci for (; i < 16; ++i) 998c2ecf20Sopenharmony_ci T_0_15(i, A, B, C, D, E); 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci /* Round 1 - tail. Input from 512-bit mixing array */ 1028c2ecf20Sopenharmony_ci for (; i < 20; ++i) 1038c2ecf20Sopenharmony_ci T_16_19(i, A, B, C, D, E); 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci /* Round 2 */ 1068c2ecf20Sopenharmony_ci for (; i < 40; ++i) 1078c2ecf20Sopenharmony_ci T_20_39(i, A, B, C, D, E); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci /* Round 3 */ 1108c2ecf20Sopenharmony_ci for (; i < 60; ++i) 1118c2ecf20Sopenharmony_ci T_40_59(i, A, B, C, D, E); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci /* Round 4 */ 1148c2ecf20Sopenharmony_ci for (; i < 80; ++i) 1158c2ecf20Sopenharmony_ci T_60_79(i, A, B, C, D, E); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci digest[0] += A; 1188c2ecf20Sopenharmony_ci digest[1] += B; 1198c2ecf20Sopenharmony_ci digest[2] += C; 1208c2ecf20Sopenharmony_ci digest[3] += D; 1218c2ecf20Sopenharmony_ci digest[4] += E; 1228c2ecf20Sopenharmony_ci} 1238c2ecf20Sopenharmony_ciEXPORT_SYMBOL(sha1_transform); 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci/** 1268c2ecf20Sopenharmony_ci * sha1_init - initialize the vectors for a SHA1 digest 1278c2ecf20Sopenharmony_ci * @buf: vector to initialize 1288c2ecf20Sopenharmony_ci */ 1298c2ecf20Sopenharmony_civoid sha1_init(__u32 *buf) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci buf[0] = 0x67452301; 1328c2ecf20Sopenharmony_ci buf[1] = 0xefcdab89; 1338c2ecf20Sopenharmony_ci buf[2] = 0x98badcfe; 1348c2ecf20Sopenharmony_ci buf[3] = 0x10325476; 1358c2ecf20Sopenharmony_ci buf[4] = 0xc3d2e1f0; 1368c2ecf20Sopenharmony_ci} 1378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(sha1_init); 138