18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Fast AES implementation for SPE instruction set (PPC) 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This code makes use of the SPE SIMD instruction set as defined in 68c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf 78c2ecf20Sopenharmony_ci * Implementation is based on optimization guide notes from 88c2ecf20Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h> 148c2ecf20Sopenharmony_ci#include "aes-spe-regs.h" 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#define EAD(in, bpos) \ 178c2ecf20Sopenharmony_ci rlwimi rT0,in,28-((bpos+3)%4)*8,20,27; 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#define DAD(in, bpos) \ 208c2ecf20Sopenharmony_ci rlwimi rT1,in,24-((bpos+3)%4)*8,24,31; 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#define LWH(out, off) \ 238c2ecf20Sopenharmony_ci evlwwsplat out,off(rT0); /* load word high */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#define LWL(out, off) \ 268c2ecf20Sopenharmony_ci lwz out,off(rT0); /* load word low */ 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#define LBZ(out, tab, off) \ 298c2ecf20Sopenharmony_ci lbz out,off(tab); /* load byte */ 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci#define LAH(out, in, bpos, off) \ 328c2ecf20Sopenharmony_ci EAD(in, bpos) /* calc addr + load word high */ \ 338c2ecf20Sopenharmony_ci LWH(out, off) 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci#define LAL(out, in, bpos, off) \ 368c2ecf20Sopenharmony_ci EAD(in, bpos) /* calc addr + load word low */ \ 378c2ecf20Sopenharmony_ci LWL(out, off) 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci#define LAE(out, in, bpos) \ 408c2ecf20Sopenharmony_ci EAD(in, bpos) /* calc addr + load enc byte */ \ 418c2ecf20Sopenharmony_ci LBZ(out, rT0, 8) 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci#define LBE(out) \ 448c2ecf20Sopenharmony_ci LBZ(out, rT0, 8) /* load enc byte */ 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci#define LAD(out, in, bpos) \ 478c2ecf20Sopenharmony_ci DAD(in, bpos) /* calc addr + load dec byte */ \ 488c2ecf20Sopenharmony_ci LBZ(out, rT1, 0) 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#define LBD(out) \ 518c2ecf20Sopenharmony_ci LBZ(out, rT1, 0) 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* 548c2ecf20Sopenharmony_ci * ppc_encrypt_block: The central encryption function for a single 16 bytes 558c2ecf20Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls 568c2ecf20Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first 578c2ecf20Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 588c2ecf20Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 598c2ecf20Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers. 608c2ecf20Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 618c2ecf20Sopenharmony_ci * 628c2ecf20Sopenharmony_ci */ 638c2ecf20Sopenharmony_ci_GLOBAL(ppc_encrypt_block) 648c2ecf20Sopenharmony_ci LAH(rW4, rD1, 2, 4) 658c2ecf20Sopenharmony_ci LAH(rW6, rD0, 3, 0) 668c2ecf20Sopenharmony_ci LAH(rW3, rD0, 1, 8) 678c2ecf20Sopenharmony_cippc_encrypt_block_loop: 688c2ecf20Sopenharmony_ci LAH(rW0, rD3, 0, 12) 698c2ecf20Sopenharmony_ci LAL(rW0, rD0, 0, 12) 708c2ecf20Sopenharmony_ci LAH(rW1, rD1, 0, 12) 718c2ecf20Sopenharmony_ci LAH(rW2, rD2, 1, 8) 728c2ecf20Sopenharmony_ci LAL(rW2, rD3, 1, 8) 738c2ecf20Sopenharmony_ci LAL(rW3, rD1, 1, 8) 748c2ecf20Sopenharmony_ci LAL(rW4, rD2, 2, 4) 758c2ecf20Sopenharmony_ci LAL(rW6, rD1, 3, 0) 768c2ecf20Sopenharmony_ci LAH(rW5, rD3, 2, 4) 778c2ecf20Sopenharmony_ci LAL(rW5, rD0, 2, 4) 788c2ecf20Sopenharmony_ci LAH(rW7, rD2, 3, 0) 798c2ecf20Sopenharmony_ci evldw rD1,16(rKP) 808c2ecf20Sopenharmony_ci EAD(rD3, 3) 818c2ecf20Sopenharmony_ci evxor rW2,rW2,rW4 828c2ecf20Sopenharmony_ci LWL(rW7, 0) 838c2ecf20Sopenharmony_ci evxor rW2,rW2,rW6 848c2ecf20Sopenharmony_ci EAD(rD2, 0) 858c2ecf20Sopenharmony_ci evxor rD1,rD1,rW2 868c2ecf20Sopenharmony_ci LWL(rW1, 12) 878c2ecf20Sopenharmony_ci evxor rD1,rD1,rW0 888c2ecf20Sopenharmony_ci evldw rD3,24(rKP) 898c2ecf20Sopenharmony_ci evmergehi rD0,rD0,rD1 908c2ecf20Sopenharmony_ci EAD(rD1, 2) 918c2ecf20Sopenharmony_ci evxor rW3,rW3,rW5 928c2ecf20Sopenharmony_ci LWH(rW4, 4) 938c2ecf20Sopenharmony_ci evxor rW3,rW3,rW7 948c2ecf20Sopenharmony_ci EAD(rD0, 3) 958c2ecf20Sopenharmony_ci evxor rD3,rD3,rW3 968c2ecf20Sopenharmony_ci LWH(rW6, 0) 978c2ecf20Sopenharmony_ci evxor rD3,rD3,rW1 988c2ecf20Sopenharmony_ci EAD(rD0, 1) 998c2ecf20Sopenharmony_ci evmergehi rD2,rD2,rD3 1008c2ecf20Sopenharmony_ci LWH(rW3, 8) 1018c2ecf20Sopenharmony_ci LAH(rW0, rD3, 0, 12) 1028c2ecf20Sopenharmony_ci LAL(rW0, rD0, 0, 12) 1038c2ecf20Sopenharmony_ci LAH(rW1, rD1, 0, 12) 1048c2ecf20Sopenharmony_ci LAH(rW2, rD2, 1, 8) 1058c2ecf20Sopenharmony_ci LAL(rW2, rD3, 1, 8) 1068c2ecf20Sopenharmony_ci LAL(rW3, rD1, 1, 8) 1078c2ecf20Sopenharmony_ci LAL(rW4, rD2, 2, 4) 1088c2ecf20Sopenharmony_ci LAL(rW6, rD1, 3, 0) 1098c2ecf20Sopenharmony_ci LAH(rW5, rD3, 2, 4) 1108c2ecf20Sopenharmony_ci LAL(rW5, rD0, 2, 4) 1118c2ecf20Sopenharmony_ci LAH(rW7, rD2, 3, 0) 1128c2ecf20Sopenharmony_ci evldw rD1,32(rKP) 1138c2ecf20Sopenharmony_ci EAD(rD3, 3) 1148c2ecf20Sopenharmony_ci evxor rW2,rW2,rW4 1158c2ecf20Sopenharmony_ci LWL(rW7, 0) 1168c2ecf20Sopenharmony_ci evxor rW2,rW2,rW6 1178c2ecf20Sopenharmony_ci EAD(rD2, 0) 1188c2ecf20Sopenharmony_ci evxor rD1,rD1,rW2 1198c2ecf20Sopenharmony_ci LWL(rW1, 12) 1208c2ecf20Sopenharmony_ci evxor rD1,rD1,rW0 1218c2ecf20Sopenharmony_ci evldw rD3,40(rKP) 1228c2ecf20Sopenharmony_ci evmergehi rD0,rD0,rD1 1238c2ecf20Sopenharmony_ci EAD(rD1, 2) 1248c2ecf20Sopenharmony_ci evxor rW3,rW3,rW5 1258c2ecf20Sopenharmony_ci LWH(rW4, 4) 1268c2ecf20Sopenharmony_ci evxor rW3,rW3,rW7 1278c2ecf20Sopenharmony_ci EAD(rD0, 3) 1288c2ecf20Sopenharmony_ci evxor rD3,rD3,rW3 1298c2ecf20Sopenharmony_ci LWH(rW6, 0) 1308c2ecf20Sopenharmony_ci evxor rD3,rD3,rW1 1318c2ecf20Sopenharmony_ci EAD(rD0, 1) 1328c2ecf20Sopenharmony_ci evmergehi rD2,rD2,rD3 1338c2ecf20Sopenharmony_ci LWH(rW3, 8) 1348c2ecf20Sopenharmony_ci addi rKP,rKP,32 1358c2ecf20Sopenharmony_ci bdnz ppc_encrypt_block_loop 1368c2ecf20Sopenharmony_ci LAH(rW0, rD3, 0, 12) 1378c2ecf20Sopenharmony_ci LAL(rW0, rD0, 0, 12) 1388c2ecf20Sopenharmony_ci LAH(rW1, rD1, 0, 12) 1398c2ecf20Sopenharmony_ci LAH(rW2, rD2, 1, 8) 1408c2ecf20Sopenharmony_ci LAL(rW2, rD3, 1, 8) 1418c2ecf20Sopenharmony_ci LAL(rW3, rD1, 1, 8) 1428c2ecf20Sopenharmony_ci LAL(rW4, rD2, 2, 4) 1438c2ecf20Sopenharmony_ci LAH(rW5, rD3, 2, 4) 1448c2ecf20Sopenharmony_ci LAL(rW6, rD1, 3, 0) 1458c2ecf20Sopenharmony_ci LAL(rW5, rD0, 2, 4) 1468c2ecf20Sopenharmony_ci LAH(rW7, rD2, 3, 0) 1478c2ecf20Sopenharmony_ci evldw rD1,16(rKP) 1488c2ecf20Sopenharmony_ci EAD(rD3, 3) 1498c2ecf20Sopenharmony_ci evxor rW2,rW2,rW4 1508c2ecf20Sopenharmony_ci LWL(rW7, 0) 1518c2ecf20Sopenharmony_ci evxor rW2,rW2,rW6 1528c2ecf20Sopenharmony_ci EAD(rD2, 0) 1538c2ecf20Sopenharmony_ci evxor rD1,rD1,rW2 1548c2ecf20Sopenharmony_ci LWL(rW1, 12) 1558c2ecf20Sopenharmony_ci evxor rD1,rD1,rW0 1568c2ecf20Sopenharmony_ci evldw rD3,24(rKP) 1578c2ecf20Sopenharmony_ci evmergehi rD0,rD0,rD1 1588c2ecf20Sopenharmony_ci EAD(rD1, 0) 1598c2ecf20Sopenharmony_ci evxor rW3,rW3,rW5 1608c2ecf20Sopenharmony_ci LBE(rW2) 1618c2ecf20Sopenharmony_ci evxor rW3,rW3,rW7 1628c2ecf20Sopenharmony_ci EAD(rD0, 1) 1638c2ecf20Sopenharmony_ci evxor rD3,rD3,rW3 1648c2ecf20Sopenharmony_ci LBE(rW6) 1658c2ecf20Sopenharmony_ci evxor rD3,rD3,rW1 1668c2ecf20Sopenharmony_ci EAD(rD0, 0) 1678c2ecf20Sopenharmony_ci evmergehi rD2,rD2,rD3 1688c2ecf20Sopenharmony_ci LBE(rW1) 1698c2ecf20Sopenharmony_ci LAE(rW0, rD3, 0) 1708c2ecf20Sopenharmony_ci LAE(rW1, rD0, 0) 1718c2ecf20Sopenharmony_ci LAE(rW4, rD2, 1) 1728c2ecf20Sopenharmony_ci LAE(rW5, rD3, 1) 1738c2ecf20Sopenharmony_ci LAE(rW3, rD2, 0) 1748c2ecf20Sopenharmony_ci LAE(rW7, rD1, 1) 1758c2ecf20Sopenharmony_ci rlwimi rW0,rW4,8,16,23 1768c2ecf20Sopenharmony_ci rlwimi rW1,rW5,8,16,23 1778c2ecf20Sopenharmony_ci LAE(rW4, rD1, 2) 1788c2ecf20Sopenharmony_ci LAE(rW5, rD2, 2) 1798c2ecf20Sopenharmony_ci rlwimi rW2,rW6,8,16,23 1808c2ecf20Sopenharmony_ci rlwimi rW3,rW7,8,16,23 1818c2ecf20Sopenharmony_ci LAE(rW6, rD3, 2) 1828c2ecf20Sopenharmony_ci LAE(rW7, rD0, 2) 1838c2ecf20Sopenharmony_ci rlwimi rW0,rW4,16,8,15 1848c2ecf20Sopenharmony_ci rlwimi rW1,rW5,16,8,15 1858c2ecf20Sopenharmony_ci LAE(rW4, rD0, 3) 1868c2ecf20Sopenharmony_ci LAE(rW5, rD1, 3) 1878c2ecf20Sopenharmony_ci rlwimi rW2,rW6,16,8,15 1888c2ecf20Sopenharmony_ci lwz rD0,32(rKP) 1898c2ecf20Sopenharmony_ci rlwimi rW3,rW7,16,8,15 1908c2ecf20Sopenharmony_ci lwz rD1,36(rKP) 1918c2ecf20Sopenharmony_ci LAE(rW6, rD2, 3) 1928c2ecf20Sopenharmony_ci LAE(rW7, rD3, 3) 1938c2ecf20Sopenharmony_ci rlwimi rW0,rW4,24,0,7 1948c2ecf20Sopenharmony_ci lwz rD2,40(rKP) 1958c2ecf20Sopenharmony_ci rlwimi rW1,rW5,24,0,7 1968c2ecf20Sopenharmony_ci lwz rD3,44(rKP) 1978c2ecf20Sopenharmony_ci rlwimi rW2,rW6,24,0,7 1988c2ecf20Sopenharmony_ci rlwimi rW3,rW7,24,0,7 1998c2ecf20Sopenharmony_ci blr 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci/* 2028c2ecf20Sopenharmony_ci * ppc_decrypt_block: The central decryption function for a single 16 bytes 2038c2ecf20Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls 2048c2ecf20Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first 2058c2ecf20Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 2068c2ecf20Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 2078c2ecf20Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers. 2088c2ecf20Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 2098c2ecf20Sopenharmony_ci * 2108c2ecf20Sopenharmony_ci */ 2118c2ecf20Sopenharmony_ci_GLOBAL(ppc_decrypt_block) 2128c2ecf20Sopenharmony_ci LAH(rW0, rD1, 0, 12) 2138c2ecf20Sopenharmony_ci LAH(rW6, rD0, 3, 0) 2148c2ecf20Sopenharmony_ci LAH(rW3, rD0, 1, 8) 2158c2ecf20Sopenharmony_cippc_decrypt_block_loop: 2168c2ecf20Sopenharmony_ci LAH(rW1, rD3, 0, 12) 2178c2ecf20Sopenharmony_ci LAL(rW0, rD2, 0, 12) 2188c2ecf20Sopenharmony_ci LAH(rW2, rD2, 1, 8) 2198c2ecf20Sopenharmony_ci LAL(rW2, rD3, 1, 8) 2208c2ecf20Sopenharmony_ci LAH(rW4, rD3, 2, 4) 2218c2ecf20Sopenharmony_ci LAL(rW4, rD0, 2, 4) 2228c2ecf20Sopenharmony_ci LAL(rW6, rD1, 3, 0) 2238c2ecf20Sopenharmony_ci LAH(rW5, rD1, 2, 4) 2248c2ecf20Sopenharmony_ci LAH(rW7, rD2, 3, 0) 2258c2ecf20Sopenharmony_ci LAL(rW7, rD3, 3, 0) 2268c2ecf20Sopenharmony_ci LAL(rW3, rD1, 1, 8) 2278c2ecf20Sopenharmony_ci evldw rD1,16(rKP) 2288c2ecf20Sopenharmony_ci EAD(rD0, 0) 2298c2ecf20Sopenharmony_ci evxor rW4,rW4,rW6 2308c2ecf20Sopenharmony_ci LWL(rW1, 12) 2318c2ecf20Sopenharmony_ci evxor rW0,rW0,rW4 2328c2ecf20Sopenharmony_ci EAD(rD2, 2) 2338c2ecf20Sopenharmony_ci evxor rW0,rW0,rW2 2348c2ecf20Sopenharmony_ci LWL(rW5, 4) 2358c2ecf20Sopenharmony_ci evxor rD1,rD1,rW0 2368c2ecf20Sopenharmony_ci evldw rD3,24(rKP) 2378c2ecf20Sopenharmony_ci evmergehi rD0,rD0,rD1 2388c2ecf20Sopenharmony_ci EAD(rD1, 0) 2398c2ecf20Sopenharmony_ci evxor rW3,rW3,rW7 2408c2ecf20Sopenharmony_ci LWH(rW0, 12) 2418c2ecf20Sopenharmony_ci evxor rW3,rW3,rW1 2428c2ecf20Sopenharmony_ci EAD(rD0, 3) 2438c2ecf20Sopenharmony_ci evxor rD3,rD3,rW3 2448c2ecf20Sopenharmony_ci LWH(rW6, 0) 2458c2ecf20Sopenharmony_ci evxor rD3,rD3,rW5 2468c2ecf20Sopenharmony_ci EAD(rD0, 1) 2478c2ecf20Sopenharmony_ci evmergehi rD2,rD2,rD3 2488c2ecf20Sopenharmony_ci LWH(rW3, 8) 2498c2ecf20Sopenharmony_ci LAH(rW1, rD3, 0, 12) 2508c2ecf20Sopenharmony_ci LAL(rW0, rD2, 0, 12) 2518c2ecf20Sopenharmony_ci LAH(rW2, rD2, 1, 8) 2528c2ecf20Sopenharmony_ci LAL(rW2, rD3, 1, 8) 2538c2ecf20Sopenharmony_ci LAH(rW4, rD3, 2, 4) 2548c2ecf20Sopenharmony_ci LAL(rW4, rD0, 2, 4) 2558c2ecf20Sopenharmony_ci LAL(rW6, rD1, 3, 0) 2568c2ecf20Sopenharmony_ci LAH(rW5, rD1, 2, 4) 2578c2ecf20Sopenharmony_ci LAH(rW7, rD2, 3, 0) 2588c2ecf20Sopenharmony_ci LAL(rW7, rD3, 3, 0) 2598c2ecf20Sopenharmony_ci LAL(rW3, rD1, 1, 8) 2608c2ecf20Sopenharmony_ci evldw rD1,32(rKP) 2618c2ecf20Sopenharmony_ci EAD(rD0, 0) 2628c2ecf20Sopenharmony_ci evxor rW4,rW4,rW6 2638c2ecf20Sopenharmony_ci LWL(rW1, 12) 2648c2ecf20Sopenharmony_ci evxor rW0,rW0,rW4 2658c2ecf20Sopenharmony_ci EAD(rD2, 2) 2668c2ecf20Sopenharmony_ci evxor rW0,rW0,rW2 2678c2ecf20Sopenharmony_ci LWL(rW5, 4) 2688c2ecf20Sopenharmony_ci evxor rD1,rD1,rW0 2698c2ecf20Sopenharmony_ci evldw rD3,40(rKP) 2708c2ecf20Sopenharmony_ci evmergehi rD0,rD0,rD1 2718c2ecf20Sopenharmony_ci EAD(rD1, 0) 2728c2ecf20Sopenharmony_ci evxor rW3,rW3,rW7 2738c2ecf20Sopenharmony_ci LWH(rW0, 12) 2748c2ecf20Sopenharmony_ci evxor rW3,rW3,rW1 2758c2ecf20Sopenharmony_ci EAD(rD0, 3) 2768c2ecf20Sopenharmony_ci evxor rD3,rD3,rW3 2778c2ecf20Sopenharmony_ci LWH(rW6, 0) 2788c2ecf20Sopenharmony_ci evxor rD3,rD3,rW5 2798c2ecf20Sopenharmony_ci EAD(rD0, 1) 2808c2ecf20Sopenharmony_ci evmergehi rD2,rD2,rD3 2818c2ecf20Sopenharmony_ci LWH(rW3, 8) 2828c2ecf20Sopenharmony_ci addi rKP,rKP,32 2838c2ecf20Sopenharmony_ci bdnz ppc_decrypt_block_loop 2848c2ecf20Sopenharmony_ci LAH(rW1, rD3, 0, 12) 2858c2ecf20Sopenharmony_ci LAL(rW0, rD2, 0, 12) 2868c2ecf20Sopenharmony_ci LAH(rW2, rD2, 1, 8) 2878c2ecf20Sopenharmony_ci LAL(rW2, rD3, 1, 8) 2888c2ecf20Sopenharmony_ci LAH(rW4, rD3, 2, 4) 2898c2ecf20Sopenharmony_ci LAL(rW4, rD0, 2, 4) 2908c2ecf20Sopenharmony_ci LAL(rW6, rD1, 3, 0) 2918c2ecf20Sopenharmony_ci LAH(rW5, rD1, 2, 4) 2928c2ecf20Sopenharmony_ci LAH(rW7, rD2, 3, 0) 2938c2ecf20Sopenharmony_ci LAL(rW7, rD3, 3, 0) 2948c2ecf20Sopenharmony_ci LAL(rW3, rD1, 1, 8) 2958c2ecf20Sopenharmony_ci evldw rD1,16(rKP) 2968c2ecf20Sopenharmony_ci EAD(rD0, 0) 2978c2ecf20Sopenharmony_ci evxor rW4,rW4,rW6 2988c2ecf20Sopenharmony_ci LWL(rW1, 12) 2998c2ecf20Sopenharmony_ci evxor rW0,rW0,rW4 3008c2ecf20Sopenharmony_ci EAD(rD2, 2) 3018c2ecf20Sopenharmony_ci evxor rW0,rW0,rW2 3028c2ecf20Sopenharmony_ci LWL(rW5, 4) 3038c2ecf20Sopenharmony_ci evxor rD1,rD1,rW0 3048c2ecf20Sopenharmony_ci evldw rD3,24(rKP) 3058c2ecf20Sopenharmony_ci evmergehi rD0,rD0,rD1 3068c2ecf20Sopenharmony_ci DAD(rD1, 0) 3078c2ecf20Sopenharmony_ci evxor rW3,rW3,rW7 3088c2ecf20Sopenharmony_ci LBD(rW0) 3098c2ecf20Sopenharmony_ci evxor rW3,rW3,rW1 3108c2ecf20Sopenharmony_ci DAD(rD0, 1) 3118c2ecf20Sopenharmony_ci evxor rD3,rD3,rW3 3128c2ecf20Sopenharmony_ci LBD(rW6) 3138c2ecf20Sopenharmony_ci evxor rD3,rD3,rW5 3148c2ecf20Sopenharmony_ci DAD(rD0, 0) 3158c2ecf20Sopenharmony_ci evmergehi rD2,rD2,rD3 3168c2ecf20Sopenharmony_ci LBD(rW3) 3178c2ecf20Sopenharmony_ci LAD(rW2, rD3, 0) 3188c2ecf20Sopenharmony_ci LAD(rW1, rD2, 0) 3198c2ecf20Sopenharmony_ci LAD(rW4, rD2, 1) 3208c2ecf20Sopenharmony_ci LAD(rW5, rD3, 1) 3218c2ecf20Sopenharmony_ci LAD(rW7, rD1, 1) 3228c2ecf20Sopenharmony_ci rlwimi rW0,rW4,8,16,23 3238c2ecf20Sopenharmony_ci rlwimi rW1,rW5,8,16,23 3248c2ecf20Sopenharmony_ci LAD(rW4, rD3, 2) 3258c2ecf20Sopenharmony_ci LAD(rW5, rD0, 2) 3268c2ecf20Sopenharmony_ci rlwimi rW2,rW6,8,16,23 3278c2ecf20Sopenharmony_ci rlwimi rW3,rW7,8,16,23 3288c2ecf20Sopenharmony_ci LAD(rW6, rD1, 2) 3298c2ecf20Sopenharmony_ci LAD(rW7, rD2, 2) 3308c2ecf20Sopenharmony_ci rlwimi rW0,rW4,16,8,15 3318c2ecf20Sopenharmony_ci rlwimi rW1,rW5,16,8,15 3328c2ecf20Sopenharmony_ci LAD(rW4, rD0, 3) 3338c2ecf20Sopenharmony_ci LAD(rW5, rD1, 3) 3348c2ecf20Sopenharmony_ci rlwimi rW2,rW6,16,8,15 3358c2ecf20Sopenharmony_ci lwz rD0,32(rKP) 3368c2ecf20Sopenharmony_ci rlwimi rW3,rW7,16,8,15 3378c2ecf20Sopenharmony_ci lwz rD1,36(rKP) 3388c2ecf20Sopenharmony_ci LAD(rW6, rD2, 3) 3398c2ecf20Sopenharmony_ci LAD(rW7, rD3, 3) 3408c2ecf20Sopenharmony_ci rlwimi rW0,rW4,24,0,7 3418c2ecf20Sopenharmony_ci lwz rD2,40(rKP) 3428c2ecf20Sopenharmony_ci rlwimi rW1,rW5,24,0,7 3438c2ecf20Sopenharmony_ci lwz rD3,44(rKP) 3448c2ecf20Sopenharmony_ci rlwimi rW2,rW6,24,0,7 3458c2ecf20Sopenharmony_ci rlwimi rW3,rW7,24,0,7 3468c2ecf20Sopenharmony_ci blr 347