18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Intel SHA Extensions optimized implementation of a SHA-1 update function 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license. When using or 58c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright(c) 2015 Intel Corporation. 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 128c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as 138c2ecf20Sopenharmony_ci * published by the Free Software Foundation. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 168c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 178c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 188c2ecf20Sopenharmony_ci * General Public License for more details. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Contact Information: 218c2ecf20Sopenharmony_ci * Sean Gulley <sean.m.gulley@intel.com> 228c2ecf20Sopenharmony_ci * Tim Chen <tim.c.chen@linux.intel.com> 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * BSD LICENSE 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * Copyright(c) 2015 Intel Corporation. 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 298c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions 308c2ecf20Sopenharmony_ci * are met: 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci * * Redistributions of source code must retain the above copyright 338c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 348c2ecf20Sopenharmony_ci * * Redistributions in binary form must reproduce the above copyright 358c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer in 368c2ecf20Sopenharmony_ci * the documentation and/or other materials provided with the 378c2ecf20Sopenharmony_ci * distribution. 388c2ecf20Sopenharmony_ci * * Neither the name of Intel Corporation nor the names of its 398c2ecf20Sopenharmony_ci * contributors may be used to endorse or promote products derived 408c2ecf20Sopenharmony_ci * from this software without specific prior written permission. 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 438c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 448c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 458c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 468c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 478c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 488c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 498c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 508c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 518c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 528c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 538c2ecf20Sopenharmony_ci * 548c2ecf20Sopenharmony_ci */ 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#include <linux/linkage.h> 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci#define DIGEST_PTR %rdi /* 1st arg */ 598c2ecf20Sopenharmony_ci#define DATA_PTR %rsi /* 2nd arg */ 608c2ecf20Sopenharmony_ci#define NUM_BLKS %rdx /* 3rd arg */ 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#define RSPSAVE %rax 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci/* gcc conversion */ 658c2ecf20Sopenharmony_ci#define FRAME_SIZE 32 /* space for 2x16 bytes */ 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci#define ABCD %xmm0 688c2ecf20Sopenharmony_ci#define E0 %xmm1 /* Need two E's b/c they ping pong */ 698c2ecf20Sopenharmony_ci#define E1 %xmm2 708c2ecf20Sopenharmony_ci#define MSG0 %xmm3 718c2ecf20Sopenharmony_ci#define MSG1 %xmm4 728c2ecf20Sopenharmony_ci#define MSG2 %xmm5 738c2ecf20Sopenharmony_ci#define MSG3 %xmm6 748c2ecf20Sopenharmony_ci#define SHUF_MASK %xmm7 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci/* 788c2ecf20Sopenharmony_ci * Intel SHA Extensions optimized implementation of a SHA-1 update function 798c2ecf20Sopenharmony_ci * 808c2ecf20Sopenharmony_ci * The function takes a pointer to the current hash values, a pointer to the 818c2ecf20Sopenharmony_ci * input data, and a number of 64 byte blocks to process. Once all blocks have 828c2ecf20Sopenharmony_ci * been processed, the digest pointer is updated with the resulting hash value. 838c2ecf20Sopenharmony_ci * The function only processes complete blocks, there is no functionality to 848c2ecf20Sopenharmony_ci * store partial blocks. All message padding and hash value initialization must 858c2ecf20Sopenharmony_ci * be done outside the update function. 868c2ecf20Sopenharmony_ci * 878c2ecf20Sopenharmony_ci * The indented lines in the loop are instructions related to rounds processing. 888c2ecf20Sopenharmony_ci * The non-indented lines are instructions related to the message schedule. 898c2ecf20Sopenharmony_ci * 908c2ecf20Sopenharmony_ci * void sha1_ni_transform(uint32_t *digest, const void *data, 918c2ecf20Sopenharmony_ci uint32_t numBlocks) 928c2ecf20Sopenharmony_ci * digest : pointer to digest 938c2ecf20Sopenharmony_ci * data: pointer to input data 948c2ecf20Sopenharmony_ci * numBlocks: Number of blocks to process 958c2ecf20Sopenharmony_ci */ 968c2ecf20Sopenharmony_ci.text 978c2ecf20Sopenharmony_ci.align 32 988c2ecf20Sopenharmony_ciSYM_FUNC_START(sha1_ni_transform) 998c2ecf20Sopenharmony_ci mov %rsp, RSPSAVE 1008c2ecf20Sopenharmony_ci sub $FRAME_SIZE, %rsp 1018c2ecf20Sopenharmony_ci and $~0xF, %rsp 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci shl $6, NUM_BLKS /* convert to bytes */ 1048c2ecf20Sopenharmony_ci jz .Ldone_hash 1058c2ecf20Sopenharmony_ci add DATA_PTR, NUM_BLKS /* pointer to end of data */ 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci /* load initial hash values */ 1088c2ecf20Sopenharmony_ci pinsrd $3, 1*16(DIGEST_PTR), E0 1098c2ecf20Sopenharmony_ci movdqu 0*16(DIGEST_PTR), ABCD 1108c2ecf20Sopenharmony_ci pand UPPER_WORD_MASK(%rip), E0 1118c2ecf20Sopenharmony_ci pshufd $0x1B, ABCD, ABCD 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci.Lloop0: 1168c2ecf20Sopenharmony_ci /* Save hash values for addition after rounds */ 1178c2ecf20Sopenharmony_ci movdqa E0, (0*16)(%rsp) 1188c2ecf20Sopenharmony_ci movdqa ABCD, (1*16)(%rsp) 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* Rounds 0-3 */ 1218c2ecf20Sopenharmony_ci movdqu 0*16(DATA_PTR), MSG0 1228c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG0 1238c2ecf20Sopenharmony_ci paddd MSG0, E0 1248c2ecf20Sopenharmony_ci movdqa ABCD, E1 1258c2ecf20Sopenharmony_ci sha1rnds4 $0, E0, ABCD 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci /* Rounds 4-7 */ 1288c2ecf20Sopenharmony_ci movdqu 1*16(DATA_PTR), MSG1 1298c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG1 1308c2ecf20Sopenharmony_ci sha1nexte MSG1, E1 1318c2ecf20Sopenharmony_ci movdqa ABCD, E0 1328c2ecf20Sopenharmony_ci sha1rnds4 $0, E1, ABCD 1338c2ecf20Sopenharmony_ci sha1msg1 MSG1, MSG0 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci /* Rounds 8-11 */ 1368c2ecf20Sopenharmony_ci movdqu 2*16(DATA_PTR), MSG2 1378c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG2 1388c2ecf20Sopenharmony_ci sha1nexte MSG2, E0 1398c2ecf20Sopenharmony_ci movdqa ABCD, E1 1408c2ecf20Sopenharmony_ci sha1rnds4 $0, E0, ABCD 1418c2ecf20Sopenharmony_ci sha1msg1 MSG2, MSG1 1428c2ecf20Sopenharmony_ci pxor MSG2, MSG0 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci /* Rounds 12-15 */ 1458c2ecf20Sopenharmony_ci movdqu 3*16(DATA_PTR), MSG3 1468c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG3 1478c2ecf20Sopenharmony_ci sha1nexte MSG3, E1 1488c2ecf20Sopenharmony_ci movdqa ABCD, E0 1498c2ecf20Sopenharmony_ci sha1msg2 MSG3, MSG0 1508c2ecf20Sopenharmony_ci sha1rnds4 $0, E1, ABCD 1518c2ecf20Sopenharmony_ci sha1msg1 MSG3, MSG2 1528c2ecf20Sopenharmony_ci pxor MSG3, MSG1 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci /* Rounds 16-19 */ 1558c2ecf20Sopenharmony_ci sha1nexte MSG0, E0 1568c2ecf20Sopenharmony_ci movdqa ABCD, E1 1578c2ecf20Sopenharmony_ci sha1msg2 MSG0, MSG1 1588c2ecf20Sopenharmony_ci sha1rnds4 $0, E0, ABCD 1598c2ecf20Sopenharmony_ci sha1msg1 MSG0, MSG3 1608c2ecf20Sopenharmony_ci pxor MSG0, MSG2 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci /* Rounds 20-23 */ 1638c2ecf20Sopenharmony_ci sha1nexte MSG1, E1 1648c2ecf20Sopenharmony_ci movdqa ABCD, E0 1658c2ecf20Sopenharmony_ci sha1msg2 MSG1, MSG2 1668c2ecf20Sopenharmony_ci sha1rnds4 $1, E1, ABCD 1678c2ecf20Sopenharmony_ci sha1msg1 MSG1, MSG0 1688c2ecf20Sopenharmony_ci pxor MSG1, MSG3 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* Rounds 24-27 */ 1718c2ecf20Sopenharmony_ci sha1nexte MSG2, E0 1728c2ecf20Sopenharmony_ci movdqa ABCD, E1 1738c2ecf20Sopenharmony_ci sha1msg2 MSG2, MSG3 1748c2ecf20Sopenharmony_ci sha1rnds4 $1, E0, ABCD 1758c2ecf20Sopenharmony_ci sha1msg1 MSG2, MSG1 1768c2ecf20Sopenharmony_ci pxor MSG2, MSG0 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci /* Rounds 28-31 */ 1798c2ecf20Sopenharmony_ci sha1nexte MSG3, E1 1808c2ecf20Sopenharmony_ci movdqa ABCD, E0 1818c2ecf20Sopenharmony_ci sha1msg2 MSG3, MSG0 1828c2ecf20Sopenharmony_ci sha1rnds4 $1, E1, ABCD 1838c2ecf20Sopenharmony_ci sha1msg1 MSG3, MSG2 1848c2ecf20Sopenharmony_ci pxor MSG3, MSG1 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci /* Rounds 32-35 */ 1878c2ecf20Sopenharmony_ci sha1nexte MSG0, E0 1888c2ecf20Sopenharmony_ci movdqa ABCD, E1 1898c2ecf20Sopenharmony_ci sha1msg2 MSG0, MSG1 1908c2ecf20Sopenharmony_ci sha1rnds4 $1, E0, ABCD 1918c2ecf20Sopenharmony_ci sha1msg1 MSG0, MSG3 1928c2ecf20Sopenharmony_ci pxor MSG0, MSG2 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci /* Rounds 36-39 */ 1958c2ecf20Sopenharmony_ci sha1nexte MSG1, E1 1968c2ecf20Sopenharmony_ci movdqa ABCD, E0 1978c2ecf20Sopenharmony_ci sha1msg2 MSG1, MSG2 1988c2ecf20Sopenharmony_ci sha1rnds4 $1, E1, ABCD 1998c2ecf20Sopenharmony_ci sha1msg1 MSG1, MSG0 2008c2ecf20Sopenharmony_ci pxor MSG1, MSG3 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci /* Rounds 40-43 */ 2038c2ecf20Sopenharmony_ci sha1nexte MSG2, E0 2048c2ecf20Sopenharmony_ci movdqa ABCD, E1 2058c2ecf20Sopenharmony_ci sha1msg2 MSG2, MSG3 2068c2ecf20Sopenharmony_ci sha1rnds4 $2, E0, ABCD 2078c2ecf20Sopenharmony_ci sha1msg1 MSG2, MSG1 2088c2ecf20Sopenharmony_ci pxor MSG2, MSG0 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci /* Rounds 44-47 */ 2118c2ecf20Sopenharmony_ci sha1nexte MSG3, E1 2128c2ecf20Sopenharmony_ci movdqa ABCD, E0 2138c2ecf20Sopenharmony_ci sha1msg2 MSG3, MSG0 2148c2ecf20Sopenharmony_ci sha1rnds4 $2, E1, ABCD 2158c2ecf20Sopenharmony_ci sha1msg1 MSG3, MSG2 2168c2ecf20Sopenharmony_ci pxor MSG3, MSG1 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci /* Rounds 48-51 */ 2198c2ecf20Sopenharmony_ci sha1nexte MSG0, E0 2208c2ecf20Sopenharmony_ci movdqa ABCD, E1 2218c2ecf20Sopenharmony_ci sha1msg2 MSG0, MSG1 2228c2ecf20Sopenharmony_ci sha1rnds4 $2, E0, ABCD 2238c2ecf20Sopenharmony_ci sha1msg1 MSG0, MSG3 2248c2ecf20Sopenharmony_ci pxor MSG0, MSG2 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci /* Rounds 52-55 */ 2278c2ecf20Sopenharmony_ci sha1nexte MSG1, E1 2288c2ecf20Sopenharmony_ci movdqa ABCD, E0 2298c2ecf20Sopenharmony_ci sha1msg2 MSG1, MSG2 2308c2ecf20Sopenharmony_ci sha1rnds4 $2, E1, ABCD 2318c2ecf20Sopenharmony_ci sha1msg1 MSG1, MSG0 2328c2ecf20Sopenharmony_ci pxor MSG1, MSG3 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci /* Rounds 56-59 */ 2358c2ecf20Sopenharmony_ci sha1nexte MSG2, E0 2368c2ecf20Sopenharmony_ci movdqa ABCD, E1 2378c2ecf20Sopenharmony_ci sha1msg2 MSG2, MSG3 2388c2ecf20Sopenharmony_ci sha1rnds4 $2, E0, ABCD 2398c2ecf20Sopenharmony_ci sha1msg1 MSG2, MSG1 2408c2ecf20Sopenharmony_ci pxor MSG2, MSG0 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci /* Rounds 60-63 */ 2438c2ecf20Sopenharmony_ci sha1nexte MSG3, E1 2448c2ecf20Sopenharmony_ci movdqa ABCD, E0 2458c2ecf20Sopenharmony_ci sha1msg2 MSG3, MSG0 2468c2ecf20Sopenharmony_ci sha1rnds4 $3, E1, ABCD 2478c2ecf20Sopenharmony_ci sha1msg1 MSG3, MSG2 2488c2ecf20Sopenharmony_ci pxor MSG3, MSG1 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci /* Rounds 64-67 */ 2518c2ecf20Sopenharmony_ci sha1nexte MSG0, E0 2528c2ecf20Sopenharmony_ci movdqa ABCD, E1 2538c2ecf20Sopenharmony_ci sha1msg2 MSG0, MSG1 2548c2ecf20Sopenharmony_ci sha1rnds4 $3, E0, ABCD 2558c2ecf20Sopenharmony_ci sha1msg1 MSG0, MSG3 2568c2ecf20Sopenharmony_ci pxor MSG0, MSG2 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci /* Rounds 68-71 */ 2598c2ecf20Sopenharmony_ci sha1nexte MSG1, E1 2608c2ecf20Sopenharmony_ci movdqa ABCD, E0 2618c2ecf20Sopenharmony_ci sha1msg2 MSG1, MSG2 2628c2ecf20Sopenharmony_ci sha1rnds4 $3, E1, ABCD 2638c2ecf20Sopenharmony_ci pxor MSG1, MSG3 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci /* Rounds 72-75 */ 2668c2ecf20Sopenharmony_ci sha1nexte MSG2, E0 2678c2ecf20Sopenharmony_ci movdqa ABCD, E1 2688c2ecf20Sopenharmony_ci sha1msg2 MSG2, MSG3 2698c2ecf20Sopenharmony_ci sha1rnds4 $3, E0, ABCD 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci /* Rounds 76-79 */ 2728c2ecf20Sopenharmony_ci sha1nexte MSG3, E1 2738c2ecf20Sopenharmony_ci movdqa ABCD, E0 2748c2ecf20Sopenharmony_ci sha1rnds4 $3, E1, ABCD 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci /* Add current hash values with previously saved */ 2778c2ecf20Sopenharmony_ci sha1nexte (0*16)(%rsp), E0 2788c2ecf20Sopenharmony_ci paddd (1*16)(%rsp), ABCD 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci /* Increment data pointer and loop if more to process */ 2818c2ecf20Sopenharmony_ci add $64, DATA_PTR 2828c2ecf20Sopenharmony_ci cmp NUM_BLKS, DATA_PTR 2838c2ecf20Sopenharmony_ci jne .Lloop0 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci /* Write hash values back in the correct order */ 2868c2ecf20Sopenharmony_ci pshufd $0x1B, ABCD, ABCD 2878c2ecf20Sopenharmony_ci movdqu ABCD, 0*16(DIGEST_PTR) 2888c2ecf20Sopenharmony_ci pextrd $3, E0, 1*16(DIGEST_PTR) 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci.Ldone_hash: 2918c2ecf20Sopenharmony_ci mov RSPSAVE, %rsp 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci RET 2948c2ecf20Sopenharmony_ciSYM_FUNC_END(sha1_ni_transform) 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 2978c2ecf20Sopenharmony_ci.align 16 2988c2ecf20Sopenharmony_ciPSHUFFLE_BYTE_FLIP_MASK: 2998c2ecf20Sopenharmony_ci .octa 0x000102030405060708090a0b0c0d0e0f 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 3028c2ecf20Sopenharmony_ci.align 16 3038c2ecf20Sopenharmony_ciUPPER_WORD_MASK: 3048c2ecf20Sopenharmony_ci .octa 0xFFFFFFFF000000000000000000000000 305