18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Intel SHA Extensions optimized implementation of a SHA-256 update function 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license. When using or 58c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright(c) 2015 Intel Corporation. 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 128c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as 138c2ecf20Sopenharmony_ci * published by the Free Software Foundation. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 168c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 178c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 188c2ecf20Sopenharmony_ci * General Public License for more details. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Contact Information: 218c2ecf20Sopenharmony_ci * Sean Gulley <sean.m.gulley@intel.com> 228c2ecf20Sopenharmony_ci * Tim Chen <tim.c.chen@linux.intel.com> 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * BSD LICENSE 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * Copyright(c) 2015 Intel Corporation. 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 298c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions 308c2ecf20Sopenharmony_ci * are met: 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci * * Redistributions of source code must retain the above copyright 338c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 348c2ecf20Sopenharmony_ci * * Redistributions in binary form must reproduce the above copyright 358c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer in 368c2ecf20Sopenharmony_ci * the documentation and/or other materials provided with the 378c2ecf20Sopenharmony_ci * distribution. 388c2ecf20Sopenharmony_ci * * Neither the name of Intel Corporation nor the names of its 398c2ecf20Sopenharmony_ci * contributors may be used to endorse or promote products derived 408c2ecf20Sopenharmony_ci * from this software without specific prior written permission. 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 438c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 448c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 458c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 468c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 478c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 488c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 498c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 508c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 518c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 528c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 538c2ecf20Sopenharmony_ci * 548c2ecf20Sopenharmony_ci */ 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#include <linux/linkage.h> 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci#define DIGEST_PTR %rdi /* 1st arg */ 598c2ecf20Sopenharmony_ci#define DATA_PTR %rsi /* 2nd arg */ 608c2ecf20Sopenharmony_ci#define NUM_BLKS %rdx /* 3rd arg */ 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#define SHA256CONSTANTS %rax 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci#define MSG %xmm0 658c2ecf20Sopenharmony_ci#define STATE0 %xmm1 668c2ecf20Sopenharmony_ci#define STATE1 %xmm2 678c2ecf20Sopenharmony_ci#define MSGTMP0 %xmm3 688c2ecf20Sopenharmony_ci#define MSGTMP1 %xmm4 698c2ecf20Sopenharmony_ci#define MSGTMP2 %xmm5 708c2ecf20Sopenharmony_ci#define MSGTMP3 %xmm6 718c2ecf20Sopenharmony_ci#define MSGTMP4 %xmm7 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci#define SHUF_MASK %xmm8 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci#define ABEF_SAVE %xmm9 768c2ecf20Sopenharmony_ci#define CDGH_SAVE %xmm10 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci/* 798c2ecf20Sopenharmony_ci * Intel SHA Extensions optimized implementation of a SHA-256 update function 808c2ecf20Sopenharmony_ci * 818c2ecf20Sopenharmony_ci * The function takes a pointer to the current hash values, a pointer to the 828c2ecf20Sopenharmony_ci * input data, and a number of 64 byte blocks to process. Once all blocks have 838c2ecf20Sopenharmony_ci * been processed, the digest pointer is updated with the resulting hash value. 848c2ecf20Sopenharmony_ci * The function only processes complete blocks, there is no functionality to 858c2ecf20Sopenharmony_ci * store partial blocks. All message padding and hash value initialization must 868c2ecf20Sopenharmony_ci * be done outside the update function. 878c2ecf20Sopenharmony_ci * 888c2ecf20Sopenharmony_ci * The indented lines in the loop are instructions related to rounds processing. 898c2ecf20Sopenharmony_ci * The non-indented lines are instructions related to the message schedule. 908c2ecf20Sopenharmony_ci * 918c2ecf20Sopenharmony_ci * void sha256_ni_transform(uint32_t *digest, const void *data, 928c2ecf20Sopenharmony_ci uint32_t numBlocks); 938c2ecf20Sopenharmony_ci * digest : pointer to digest 948c2ecf20Sopenharmony_ci * data: pointer to input data 958c2ecf20Sopenharmony_ci * numBlocks: Number of blocks to process 968c2ecf20Sopenharmony_ci */ 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci.text 998c2ecf20Sopenharmony_ci.align 32 1008c2ecf20Sopenharmony_ciSYM_FUNC_START(sha256_ni_transform) 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci shl $6, NUM_BLKS /* convert to bytes */ 1038c2ecf20Sopenharmony_ci jz .Ldone_hash 1048c2ecf20Sopenharmony_ci add DATA_PTR, NUM_BLKS /* pointer to end of data */ 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci /* 1078c2ecf20Sopenharmony_ci * load initial hash values 1088c2ecf20Sopenharmony_ci * Need to reorder these appropriately 1098c2ecf20Sopenharmony_ci * DCBA, HGFE -> ABEF, CDGH 1108c2ecf20Sopenharmony_ci */ 1118c2ecf20Sopenharmony_ci movdqu 0*16(DIGEST_PTR), STATE0 1128c2ecf20Sopenharmony_ci movdqu 1*16(DIGEST_PTR), STATE1 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci pshufd $0xB1, STATE0, STATE0 /* CDAB */ 1158c2ecf20Sopenharmony_ci pshufd $0x1B, STATE1, STATE1 /* EFGH */ 1168c2ecf20Sopenharmony_ci movdqa STATE0, MSGTMP4 1178c2ecf20Sopenharmony_ci palignr $8, STATE1, STATE0 /* ABEF */ 1188c2ecf20Sopenharmony_ci pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK 1218c2ecf20Sopenharmony_ci lea K256(%rip), SHA256CONSTANTS 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci.Lloop0: 1248c2ecf20Sopenharmony_ci /* Save hash values for addition after rounds */ 1258c2ecf20Sopenharmony_ci movdqa STATE0, ABEF_SAVE 1268c2ecf20Sopenharmony_ci movdqa STATE1, CDGH_SAVE 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci /* Rounds 0-3 */ 1298c2ecf20Sopenharmony_ci movdqu 0*16(DATA_PTR), MSG 1308c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG 1318c2ecf20Sopenharmony_ci movdqa MSG, MSGTMP0 1328c2ecf20Sopenharmony_ci paddd 0*16(SHA256CONSTANTS), MSG 1338c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 1348c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 1358c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci /* Rounds 4-7 */ 1388c2ecf20Sopenharmony_ci movdqu 1*16(DATA_PTR), MSG 1398c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG 1408c2ecf20Sopenharmony_ci movdqa MSG, MSGTMP1 1418c2ecf20Sopenharmony_ci paddd 1*16(SHA256CONSTANTS), MSG 1428c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 1438c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 1448c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 1458c2ecf20Sopenharmony_ci sha256msg1 MSGTMP1, MSGTMP0 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* Rounds 8-11 */ 1488c2ecf20Sopenharmony_ci movdqu 2*16(DATA_PTR), MSG 1498c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG 1508c2ecf20Sopenharmony_ci movdqa MSG, MSGTMP2 1518c2ecf20Sopenharmony_ci paddd 2*16(SHA256CONSTANTS), MSG 1528c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 1538c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 1548c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 1558c2ecf20Sopenharmony_ci sha256msg1 MSGTMP2, MSGTMP1 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci /* Rounds 12-15 */ 1588c2ecf20Sopenharmony_ci movdqu 3*16(DATA_PTR), MSG 1598c2ecf20Sopenharmony_ci pshufb SHUF_MASK, MSG 1608c2ecf20Sopenharmony_ci movdqa MSG, MSGTMP3 1618c2ecf20Sopenharmony_ci paddd 3*16(SHA256CONSTANTS), MSG 1628c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 1638c2ecf20Sopenharmony_ci movdqa MSGTMP3, MSGTMP4 1648c2ecf20Sopenharmony_ci palignr $4, MSGTMP2, MSGTMP4 1658c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP0 1668c2ecf20Sopenharmony_ci sha256msg2 MSGTMP3, MSGTMP0 1678c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 1688c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 1698c2ecf20Sopenharmony_ci sha256msg1 MSGTMP3, MSGTMP2 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci /* Rounds 16-19 */ 1728c2ecf20Sopenharmony_ci movdqa MSGTMP0, MSG 1738c2ecf20Sopenharmony_ci paddd 4*16(SHA256CONSTANTS), MSG 1748c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 1758c2ecf20Sopenharmony_ci movdqa MSGTMP0, MSGTMP4 1768c2ecf20Sopenharmony_ci palignr $4, MSGTMP3, MSGTMP4 1778c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP1 1788c2ecf20Sopenharmony_ci sha256msg2 MSGTMP0, MSGTMP1 1798c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 1808c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 1818c2ecf20Sopenharmony_ci sha256msg1 MSGTMP0, MSGTMP3 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci /* Rounds 20-23 */ 1848c2ecf20Sopenharmony_ci movdqa MSGTMP1, MSG 1858c2ecf20Sopenharmony_ci paddd 5*16(SHA256CONSTANTS), MSG 1868c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 1878c2ecf20Sopenharmony_ci movdqa MSGTMP1, MSGTMP4 1888c2ecf20Sopenharmony_ci palignr $4, MSGTMP0, MSGTMP4 1898c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP2 1908c2ecf20Sopenharmony_ci sha256msg2 MSGTMP1, MSGTMP2 1918c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 1928c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 1938c2ecf20Sopenharmony_ci sha256msg1 MSGTMP1, MSGTMP0 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci /* Rounds 24-27 */ 1968c2ecf20Sopenharmony_ci movdqa MSGTMP2, MSG 1978c2ecf20Sopenharmony_ci paddd 6*16(SHA256CONSTANTS), MSG 1988c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 1998c2ecf20Sopenharmony_ci movdqa MSGTMP2, MSGTMP4 2008c2ecf20Sopenharmony_ci palignr $4, MSGTMP1, MSGTMP4 2018c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP3 2028c2ecf20Sopenharmony_ci sha256msg2 MSGTMP2, MSGTMP3 2038c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2048c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2058c2ecf20Sopenharmony_ci sha256msg1 MSGTMP2, MSGTMP1 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci /* Rounds 28-31 */ 2088c2ecf20Sopenharmony_ci movdqa MSGTMP3, MSG 2098c2ecf20Sopenharmony_ci paddd 7*16(SHA256CONSTANTS), MSG 2108c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2118c2ecf20Sopenharmony_ci movdqa MSGTMP3, MSGTMP4 2128c2ecf20Sopenharmony_ci palignr $4, MSGTMP2, MSGTMP4 2138c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP0 2148c2ecf20Sopenharmony_ci sha256msg2 MSGTMP3, MSGTMP0 2158c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2168c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2178c2ecf20Sopenharmony_ci sha256msg1 MSGTMP3, MSGTMP2 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci /* Rounds 32-35 */ 2208c2ecf20Sopenharmony_ci movdqa MSGTMP0, MSG 2218c2ecf20Sopenharmony_ci paddd 8*16(SHA256CONSTANTS), MSG 2228c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2238c2ecf20Sopenharmony_ci movdqa MSGTMP0, MSGTMP4 2248c2ecf20Sopenharmony_ci palignr $4, MSGTMP3, MSGTMP4 2258c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP1 2268c2ecf20Sopenharmony_ci sha256msg2 MSGTMP0, MSGTMP1 2278c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2288c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2298c2ecf20Sopenharmony_ci sha256msg1 MSGTMP0, MSGTMP3 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci /* Rounds 36-39 */ 2328c2ecf20Sopenharmony_ci movdqa MSGTMP1, MSG 2338c2ecf20Sopenharmony_ci paddd 9*16(SHA256CONSTANTS), MSG 2348c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2358c2ecf20Sopenharmony_ci movdqa MSGTMP1, MSGTMP4 2368c2ecf20Sopenharmony_ci palignr $4, MSGTMP0, MSGTMP4 2378c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP2 2388c2ecf20Sopenharmony_ci sha256msg2 MSGTMP1, MSGTMP2 2398c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2408c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2418c2ecf20Sopenharmony_ci sha256msg1 MSGTMP1, MSGTMP0 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci /* Rounds 40-43 */ 2448c2ecf20Sopenharmony_ci movdqa MSGTMP2, MSG 2458c2ecf20Sopenharmony_ci paddd 10*16(SHA256CONSTANTS), MSG 2468c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2478c2ecf20Sopenharmony_ci movdqa MSGTMP2, MSGTMP4 2488c2ecf20Sopenharmony_ci palignr $4, MSGTMP1, MSGTMP4 2498c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP3 2508c2ecf20Sopenharmony_ci sha256msg2 MSGTMP2, MSGTMP3 2518c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2528c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2538c2ecf20Sopenharmony_ci sha256msg1 MSGTMP2, MSGTMP1 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci /* Rounds 44-47 */ 2568c2ecf20Sopenharmony_ci movdqa MSGTMP3, MSG 2578c2ecf20Sopenharmony_ci paddd 11*16(SHA256CONSTANTS), MSG 2588c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2598c2ecf20Sopenharmony_ci movdqa MSGTMP3, MSGTMP4 2608c2ecf20Sopenharmony_ci palignr $4, MSGTMP2, MSGTMP4 2618c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP0 2628c2ecf20Sopenharmony_ci sha256msg2 MSGTMP3, MSGTMP0 2638c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2648c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2658c2ecf20Sopenharmony_ci sha256msg1 MSGTMP3, MSGTMP2 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci /* Rounds 48-51 */ 2688c2ecf20Sopenharmony_ci movdqa MSGTMP0, MSG 2698c2ecf20Sopenharmony_ci paddd 12*16(SHA256CONSTANTS), MSG 2708c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2718c2ecf20Sopenharmony_ci movdqa MSGTMP0, MSGTMP4 2728c2ecf20Sopenharmony_ci palignr $4, MSGTMP3, MSGTMP4 2738c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP1 2748c2ecf20Sopenharmony_ci sha256msg2 MSGTMP0, MSGTMP1 2758c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2768c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2778c2ecf20Sopenharmony_ci sha256msg1 MSGTMP0, MSGTMP3 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci /* Rounds 52-55 */ 2808c2ecf20Sopenharmony_ci movdqa MSGTMP1, MSG 2818c2ecf20Sopenharmony_ci paddd 13*16(SHA256CONSTANTS), MSG 2828c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2838c2ecf20Sopenharmony_ci movdqa MSGTMP1, MSGTMP4 2848c2ecf20Sopenharmony_ci palignr $4, MSGTMP0, MSGTMP4 2858c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP2 2868c2ecf20Sopenharmony_ci sha256msg2 MSGTMP1, MSGTMP2 2878c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2888c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci /* Rounds 56-59 */ 2918c2ecf20Sopenharmony_ci movdqa MSGTMP2, MSG 2928c2ecf20Sopenharmony_ci paddd 14*16(SHA256CONSTANTS), MSG 2938c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 2948c2ecf20Sopenharmony_ci movdqa MSGTMP2, MSGTMP4 2958c2ecf20Sopenharmony_ci palignr $4, MSGTMP1, MSGTMP4 2968c2ecf20Sopenharmony_ci paddd MSGTMP4, MSGTMP3 2978c2ecf20Sopenharmony_ci sha256msg2 MSGTMP2, MSGTMP3 2988c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 2998c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci /* Rounds 60-63 */ 3028c2ecf20Sopenharmony_ci movdqa MSGTMP3, MSG 3038c2ecf20Sopenharmony_ci paddd 15*16(SHA256CONSTANTS), MSG 3048c2ecf20Sopenharmony_ci sha256rnds2 STATE0, STATE1 3058c2ecf20Sopenharmony_ci pshufd $0x0E, MSG, MSG 3068c2ecf20Sopenharmony_ci sha256rnds2 STATE1, STATE0 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci /* Add current hash values with previously saved */ 3098c2ecf20Sopenharmony_ci paddd ABEF_SAVE, STATE0 3108c2ecf20Sopenharmony_ci paddd CDGH_SAVE, STATE1 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci /* Increment data pointer and loop if more to process */ 3138c2ecf20Sopenharmony_ci add $64, DATA_PTR 3148c2ecf20Sopenharmony_ci cmp NUM_BLKS, DATA_PTR 3158c2ecf20Sopenharmony_ci jne .Lloop0 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci /* Write hash values back in the correct order */ 3188c2ecf20Sopenharmony_ci pshufd $0x1B, STATE0, STATE0 /* FEBA */ 3198c2ecf20Sopenharmony_ci pshufd $0xB1, STATE1, STATE1 /* DCHG */ 3208c2ecf20Sopenharmony_ci movdqa STATE0, MSGTMP4 3218c2ecf20Sopenharmony_ci pblendw $0xF0, STATE1, STATE0 /* DCBA */ 3228c2ecf20Sopenharmony_ci palignr $8, MSGTMP4, STATE1 /* HGFE */ 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci movdqu STATE0, 0*16(DIGEST_PTR) 3258c2ecf20Sopenharmony_ci movdqu STATE1, 1*16(DIGEST_PTR) 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci.Ldone_hash: 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci RET 3308c2ecf20Sopenharmony_ciSYM_FUNC_END(sha256_ni_transform) 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci.section .rodata.cst256.K256, "aM", @progbits, 256 3338c2ecf20Sopenharmony_ci.align 64 3348c2ecf20Sopenharmony_ciK256: 3358c2ecf20Sopenharmony_ci .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 3368c2ecf20Sopenharmony_ci .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 3378c2ecf20Sopenharmony_ci .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 3388c2ecf20Sopenharmony_ci .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 3398c2ecf20Sopenharmony_ci .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 3408c2ecf20Sopenharmony_ci .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 3418c2ecf20Sopenharmony_ci .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 3428c2ecf20Sopenharmony_ci .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 3438c2ecf20Sopenharmony_ci .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 3448c2ecf20Sopenharmony_ci .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 3458c2ecf20Sopenharmony_ci .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 3468c2ecf20Sopenharmony_ci .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 3478c2ecf20Sopenharmony_ci .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 3488c2ecf20Sopenharmony_ci .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 3498c2ecf20Sopenharmony_ci .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 3508c2ecf20Sopenharmony_ci .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 3538c2ecf20Sopenharmony_ci.align 16 3548c2ecf20Sopenharmony_ciPSHUFFLE_BYTE_FLIP_MASK: 3558c2ecf20Sopenharmony_ci .octa 0x0c0d0e0f08090a0b0405060700010203 356