162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * SHA1 routine optimized to do word accesses rather than byte accesses, 462306a36Sopenharmony_ci * and to avoid unnecessary copies into the context array. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This was based on the git SHA1 implementation. 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <linux/kernel.h> 1062306a36Sopenharmony_ci#include <linux/export.h> 1162306a36Sopenharmony_ci#include <linux/module.h> 1262306a36Sopenharmony_ci#include <linux/bitops.h> 1362306a36Sopenharmony_ci#include <linux/string.h> 1462306a36Sopenharmony_ci#include <crypto/sha1.h> 1562306a36Sopenharmony_ci#include <asm/unaligned.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci/* 1862306a36Sopenharmony_ci * If you have 32 registers or more, the compiler can (and should) 1962306a36Sopenharmony_ci * try to change the array[] accesses into registers. However, on 2062306a36Sopenharmony_ci * machines with less than ~25 registers, that won't really work, 2162306a36Sopenharmony_ci * and at least gcc will make an unholy mess of it. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * So to avoid that mess which just slows things down, we force 2462306a36Sopenharmony_ci * the stores to memory to actually happen (we might be better off 2562306a36Sopenharmony_ci * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as 2662306a36Sopenharmony_ci * suggested by Artur Skawina - that will also make gcc unable to 2762306a36Sopenharmony_ci * try to do the silly "optimize away loads" part because it won't 2862306a36Sopenharmony_ci * see what the value will be). 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * Ben Herrenschmidt reports that on PPC, the C version comes close 3162306a36Sopenharmony_ci * to the optimized asm with this (ie on PPC you don't want that 3262306a36Sopenharmony_ci * 'volatile', since there are lots of registers). 3362306a36Sopenharmony_ci * 3462306a36Sopenharmony_ci * On ARM we get the best code generation by forcing a full memory barrier 3562306a36Sopenharmony_ci * between each SHA_ROUND, otherwise gcc happily get wild with spilling and 3662306a36Sopenharmony_ci * the stack frame size simply explode and performance goes down the drain. 3762306a36Sopenharmony_ci */ 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#ifdef CONFIG_X86 4062306a36Sopenharmony_ci #define setW(x, val) (*(volatile __u32 *)&W(x) = (val)) 4162306a36Sopenharmony_ci#elif defined(CONFIG_ARM) 4262306a36Sopenharmony_ci #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0) 4362306a36Sopenharmony_ci#else 4462306a36Sopenharmony_ci #define setW(x, val) (W(x) = (val)) 4562306a36Sopenharmony_ci#endif 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci/* This "rolls" over the 512-bit array */ 4862306a36Sopenharmony_ci#define W(x) (array[(x)&15]) 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* 5162306a36Sopenharmony_ci * Where do we get the source from? The first 16 iterations get it from 5262306a36Sopenharmony_ci * the input data, the next mix it from the 512-bit array. 5362306a36Sopenharmony_ci */ 5462306a36Sopenharmony_ci#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t) 5562306a36Sopenharmony_ci#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1) 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \ 5862306a36Sopenharmony_ci __u32 TEMP = input(t); setW(t, TEMP); \ 5962306a36Sopenharmony_ci E += TEMP + rol32(A,5) + (fn) + (constant); \ 6062306a36Sopenharmony_ci B = ror32(B, 2); \ 6162306a36Sopenharmony_ci TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0) 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) 6462306a36Sopenharmony_ci#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E ) 6562306a36Sopenharmony_ci#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E ) 6662306a36Sopenharmony_ci#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E ) 6762306a36Sopenharmony_ci#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E ) 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci/** 7062306a36Sopenharmony_ci * sha1_transform - single block SHA1 transform (deprecated) 7162306a36Sopenharmony_ci * 7262306a36Sopenharmony_ci * @digest: 160 bit digest to update 7362306a36Sopenharmony_ci * @data: 512 bits of data to hash 7462306a36Sopenharmony_ci * @array: 16 words of workspace (see note) 7562306a36Sopenharmony_ci * 7662306a36Sopenharmony_ci * This function executes SHA-1's internal compression function. It updates the 7762306a36Sopenharmony_ci * 160-bit internal state (@digest) with a single 512-bit data block (@data). 7862306a36Sopenharmony_ci * 7962306a36Sopenharmony_ci * Don't use this function. SHA-1 is no longer considered secure. And even if 8062306a36Sopenharmony_ci * you do have to use SHA-1, this isn't the correct way to hash something with 8162306a36Sopenharmony_ci * SHA-1 as this doesn't handle padding and finalization. 8262306a36Sopenharmony_ci * 8362306a36Sopenharmony_ci * Note: If the hash is security sensitive, the caller should be sure 8462306a36Sopenharmony_ci * to clear the workspace. This is left to the caller to avoid 8562306a36Sopenharmony_ci * unnecessary clears between chained hashing operations. 8662306a36Sopenharmony_ci */ 8762306a36Sopenharmony_civoid sha1_transform(__u32 *digest, const char *data, __u32 *array) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci __u32 A, B, C, D, E; 9062306a36Sopenharmony_ci unsigned int i = 0; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci A = digest[0]; 9362306a36Sopenharmony_ci B = digest[1]; 9462306a36Sopenharmony_ci C = digest[2]; 9562306a36Sopenharmony_ci D = digest[3]; 9662306a36Sopenharmony_ci E = digest[4]; 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci /* Round 1 - iterations 0-16 take their input from 'data' */ 9962306a36Sopenharmony_ci for (; i < 16; ++i) 10062306a36Sopenharmony_ci T_0_15(i, A, B, C, D, E); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci /* Round 1 - tail. Input from 512-bit mixing array */ 10362306a36Sopenharmony_ci for (; i < 20; ++i) 10462306a36Sopenharmony_ci T_16_19(i, A, B, C, D, E); 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci /* Round 2 */ 10762306a36Sopenharmony_ci for (; i < 40; ++i) 10862306a36Sopenharmony_ci T_20_39(i, A, B, C, D, E); 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci /* Round 3 */ 11162306a36Sopenharmony_ci for (; i < 60; ++i) 11262306a36Sopenharmony_ci T_40_59(i, A, B, C, D, E); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci /* Round 4 */ 11562306a36Sopenharmony_ci for (; i < 80; ++i) 11662306a36Sopenharmony_ci T_60_79(i, A, B, C, D, E); 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci digest[0] += A; 11962306a36Sopenharmony_ci digest[1] += B; 12062306a36Sopenharmony_ci digest[2] += C; 12162306a36Sopenharmony_ci digest[3] += D; 12262306a36Sopenharmony_ci digest[4] += E; 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ciEXPORT_SYMBOL(sha1_transform); 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci/** 12762306a36Sopenharmony_ci * sha1_init - initialize the vectors for a SHA1 digest 12862306a36Sopenharmony_ci * @buf: vector to initialize 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_civoid sha1_init(__u32 *buf) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci buf[0] = 0x67452301; 13362306a36Sopenharmony_ci buf[1] = 0xefcdab89; 13462306a36Sopenharmony_ci buf[2] = 0x98badcfe; 13562306a36Sopenharmony_ci buf[3] = 0x10325476; 13662306a36Sopenharmony_ci buf[4] = 0xc3d2e1f0; 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ciEXPORT_SYMBOL(sha1_init); 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 141