162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Fast AES implementation for SPE instruction set (PPC) 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This code makes use of the SPE SIMD instruction set as defined in 662306a36Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf 762306a36Sopenharmony_ci * Implementation is based on optimization guide notes from 862306a36Sopenharmony_ci * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <asm/ppc_asm.h> 1462306a36Sopenharmony_ci#include "aes-spe-regs.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#define EAD(in, bpos) \ 1762306a36Sopenharmony_ci rlwimi rT0,in,28-((bpos+3)%4)*8,20,27; 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#define DAD(in, bpos) \ 2062306a36Sopenharmony_ci rlwimi rT1,in,24-((bpos+3)%4)*8,24,31; 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#define LWH(out, off) \ 2362306a36Sopenharmony_ci evlwwsplat out,off(rT0); /* load word high */ 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define LWL(out, off) \ 2662306a36Sopenharmony_ci lwz out,off(rT0); /* load word low */ 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#define LBZ(out, tab, off) \ 2962306a36Sopenharmony_ci lbz out,off(tab); /* load byte */ 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define LAH(out, in, bpos, off) \ 3262306a36Sopenharmony_ci EAD(in, bpos) /* calc addr + load word high */ \ 3362306a36Sopenharmony_ci LWH(out, off) 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#define LAL(out, in, bpos, off) \ 3662306a36Sopenharmony_ci EAD(in, bpos) /* calc addr + load word low */ \ 3762306a36Sopenharmony_ci LWL(out, off) 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#define LAE(out, in, bpos) \ 4062306a36Sopenharmony_ci EAD(in, bpos) /* calc addr + load enc byte */ \ 4162306a36Sopenharmony_ci LBZ(out, rT0, 8) 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci#define LBE(out) \ 4462306a36Sopenharmony_ci LBZ(out, rT0, 8) /* load enc byte */ 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci#define LAD(out, in, bpos) \ 4762306a36Sopenharmony_ci DAD(in, bpos) /* calc addr + load dec byte */ \ 4862306a36Sopenharmony_ci LBZ(out, rT1, 0) 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#define LBD(out) \ 5162306a36Sopenharmony_ci LBZ(out, rT1, 0) 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* 5462306a36Sopenharmony_ci * ppc_encrypt_block: The central encryption function for a single 16 bytes 5562306a36Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls 5662306a36Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first 5762306a36Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 5862306a36Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 5962306a36Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers. 6062306a36Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 6162306a36Sopenharmony_ci * 6262306a36Sopenharmony_ci */ 6362306a36Sopenharmony_ci_GLOBAL(ppc_encrypt_block) 6462306a36Sopenharmony_ci LAH(rW4, rD1, 2, 4) 6562306a36Sopenharmony_ci LAH(rW6, rD0, 3, 0) 6662306a36Sopenharmony_ci LAH(rW3, rD0, 1, 8) 6762306a36Sopenharmony_cippc_encrypt_block_loop: 6862306a36Sopenharmony_ci LAH(rW0, rD3, 0, 12) 6962306a36Sopenharmony_ci LAL(rW0, rD0, 0, 12) 7062306a36Sopenharmony_ci LAH(rW1, rD1, 0, 12) 7162306a36Sopenharmony_ci LAH(rW2, rD2, 1, 8) 7262306a36Sopenharmony_ci LAL(rW2, rD3, 1, 8) 7362306a36Sopenharmony_ci LAL(rW3, rD1, 1, 8) 7462306a36Sopenharmony_ci LAL(rW4, rD2, 2, 4) 7562306a36Sopenharmony_ci LAL(rW6, rD1, 3, 0) 7662306a36Sopenharmony_ci LAH(rW5, rD3, 2, 4) 7762306a36Sopenharmony_ci LAL(rW5, rD0, 2, 4) 7862306a36Sopenharmony_ci LAH(rW7, rD2, 3, 0) 7962306a36Sopenharmony_ci evldw rD1,16(rKP) 8062306a36Sopenharmony_ci EAD(rD3, 3) 8162306a36Sopenharmony_ci evxor rW2,rW2,rW4 8262306a36Sopenharmony_ci LWL(rW7, 0) 8362306a36Sopenharmony_ci evxor rW2,rW2,rW6 8462306a36Sopenharmony_ci EAD(rD2, 0) 8562306a36Sopenharmony_ci evxor rD1,rD1,rW2 8662306a36Sopenharmony_ci LWL(rW1, 12) 8762306a36Sopenharmony_ci evxor rD1,rD1,rW0 8862306a36Sopenharmony_ci evldw rD3,24(rKP) 8962306a36Sopenharmony_ci evmergehi rD0,rD0,rD1 9062306a36Sopenharmony_ci EAD(rD1, 2) 9162306a36Sopenharmony_ci evxor rW3,rW3,rW5 9262306a36Sopenharmony_ci LWH(rW4, 4) 9362306a36Sopenharmony_ci evxor rW3,rW3,rW7 9462306a36Sopenharmony_ci EAD(rD0, 3) 9562306a36Sopenharmony_ci evxor rD3,rD3,rW3 9662306a36Sopenharmony_ci LWH(rW6, 0) 9762306a36Sopenharmony_ci evxor rD3,rD3,rW1 9862306a36Sopenharmony_ci EAD(rD0, 1) 9962306a36Sopenharmony_ci evmergehi rD2,rD2,rD3 10062306a36Sopenharmony_ci LWH(rW3, 8) 10162306a36Sopenharmony_ci LAH(rW0, rD3, 0, 12) 10262306a36Sopenharmony_ci LAL(rW0, rD0, 0, 12) 10362306a36Sopenharmony_ci LAH(rW1, rD1, 0, 12) 10462306a36Sopenharmony_ci LAH(rW2, rD2, 1, 8) 10562306a36Sopenharmony_ci LAL(rW2, rD3, 1, 8) 10662306a36Sopenharmony_ci LAL(rW3, rD1, 1, 8) 10762306a36Sopenharmony_ci LAL(rW4, rD2, 2, 4) 10862306a36Sopenharmony_ci LAL(rW6, rD1, 3, 0) 10962306a36Sopenharmony_ci LAH(rW5, rD3, 2, 4) 11062306a36Sopenharmony_ci LAL(rW5, rD0, 2, 4) 11162306a36Sopenharmony_ci LAH(rW7, rD2, 3, 0) 11262306a36Sopenharmony_ci evldw rD1,32(rKP) 11362306a36Sopenharmony_ci EAD(rD3, 3) 11462306a36Sopenharmony_ci evxor rW2,rW2,rW4 11562306a36Sopenharmony_ci LWL(rW7, 0) 11662306a36Sopenharmony_ci evxor rW2,rW2,rW6 11762306a36Sopenharmony_ci EAD(rD2, 0) 11862306a36Sopenharmony_ci evxor rD1,rD1,rW2 11962306a36Sopenharmony_ci LWL(rW1, 12) 12062306a36Sopenharmony_ci evxor rD1,rD1,rW0 12162306a36Sopenharmony_ci evldw rD3,40(rKP) 12262306a36Sopenharmony_ci evmergehi rD0,rD0,rD1 12362306a36Sopenharmony_ci EAD(rD1, 2) 12462306a36Sopenharmony_ci evxor rW3,rW3,rW5 12562306a36Sopenharmony_ci LWH(rW4, 4) 12662306a36Sopenharmony_ci evxor rW3,rW3,rW7 12762306a36Sopenharmony_ci EAD(rD0, 3) 12862306a36Sopenharmony_ci evxor rD3,rD3,rW3 12962306a36Sopenharmony_ci LWH(rW6, 0) 13062306a36Sopenharmony_ci evxor rD3,rD3,rW1 13162306a36Sopenharmony_ci EAD(rD0, 1) 13262306a36Sopenharmony_ci evmergehi rD2,rD2,rD3 13362306a36Sopenharmony_ci LWH(rW3, 8) 13462306a36Sopenharmony_ci addi rKP,rKP,32 13562306a36Sopenharmony_ci bdnz ppc_encrypt_block_loop 13662306a36Sopenharmony_ci LAH(rW0, rD3, 0, 12) 13762306a36Sopenharmony_ci LAL(rW0, rD0, 0, 12) 13862306a36Sopenharmony_ci LAH(rW1, rD1, 0, 12) 13962306a36Sopenharmony_ci LAH(rW2, rD2, 1, 8) 14062306a36Sopenharmony_ci LAL(rW2, rD3, 1, 8) 14162306a36Sopenharmony_ci LAL(rW3, rD1, 1, 8) 14262306a36Sopenharmony_ci LAL(rW4, rD2, 2, 4) 14362306a36Sopenharmony_ci LAH(rW5, rD3, 2, 4) 14462306a36Sopenharmony_ci LAL(rW6, rD1, 3, 0) 14562306a36Sopenharmony_ci LAL(rW5, rD0, 2, 4) 14662306a36Sopenharmony_ci LAH(rW7, rD2, 3, 0) 14762306a36Sopenharmony_ci evldw rD1,16(rKP) 14862306a36Sopenharmony_ci EAD(rD3, 3) 14962306a36Sopenharmony_ci evxor rW2,rW2,rW4 15062306a36Sopenharmony_ci LWL(rW7, 0) 15162306a36Sopenharmony_ci evxor rW2,rW2,rW6 15262306a36Sopenharmony_ci EAD(rD2, 0) 15362306a36Sopenharmony_ci evxor rD1,rD1,rW2 15462306a36Sopenharmony_ci LWL(rW1, 12) 15562306a36Sopenharmony_ci evxor rD1,rD1,rW0 15662306a36Sopenharmony_ci evldw rD3,24(rKP) 15762306a36Sopenharmony_ci evmergehi rD0,rD0,rD1 15862306a36Sopenharmony_ci EAD(rD1, 0) 15962306a36Sopenharmony_ci evxor rW3,rW3,rW5 16062306a36Sopenharmony_ci LBE(rW2) 16162306a36Sopenharmony_ci evxor rW3,rW3,rW7 16262306a36Sopenharmony_ci EAD(rD0, 1) 16362306a36Sopenharmony_ci evxor rD3,rD3,rW3 16462306a36Sopenharmony_ci LBE(rW6) 16562306a36Sopenharmony_ci evxor rD3,rD3,rW1 16662306a36Sopenharmony_ci EAD(rD0, 0) 16762306a36Sopenharmony_ci evmergehi rD2,rD2,rD3 16862306a36Sopenharmony_ci LBE(rW1) 16962306a36Sopenharmony_ci LAE(rW0, rD3, 0) 17062306a36Sopenharmony_ci LAE(rW1, rD0, 0) 17162306a36Sopenharmony_ci LAE(rW4, rD2, 1) 17262306a36Sopenharmony_ci LAE(rW5, rD3, 1) 17362306a36Sopenharmony_ci LAE(rW3, rD2, 0) 17462306a36Sopenharmony_ci LAE(rW7, rD1, 1) 17562306a36Sopenharmony_ci rlwimi rW0,rW4,8,16,23 17662306a36Sopenharmony_ci rlwimi rW1,rW5,8,16,23 17762306a36Sopenharmony_ci LAE(rW4, rD1, 2) 17862306a36Sopenharmony_ci LAE(rW5, rD2, 2) 17962306a36Sopenharmony_ci rlwimi rW2,rW6,8,16,23 18062306a36Sopenharmony_ci rlwimi rW3,rW7,8,16,23 18162306a36Sopenharmony_ci LAE(rW6, rD3, 2) 18262306a36Sopenharmony_ci LAE(rW7, rD0, 2) 18362306a36Sopenharmony_ci rlwimi rW0,rW4,16,8,15 18462306a36Sopenharmony_ci rlwimi rW1,rW5,16,8,15 18562306a36Sopenharmony_ci LAE(rW4, rD0, 3) 18662306a36Sopenharmony_ci LAE(rW5, rD1, 3) 18762306a36Sopenharmony_ci rlwimi rW2,rW6,16,8,15 18862306a36Sopenharmony_ci lwz rD0,32(rKP) 18962306a36Sopenharmony_ci rlwimi rW3,rW7,16,8,15 19062306a36Sopenharmony_ci lwz rD1,36(rKP) 19162306a36Sopenharmony_ci LAE(rW6, rD2, 3) 19262306a36Sopenharmony_ci LAE(rW7, rD3, 3) 19362306a36Sopenharmony_ci rlwimi rW0,rW4,24,0,7 19462306a36Sopenharmony_ci lwz rD2,40(rKP) 19562306a36Sopenharmony_ci rlwimi rW1,rW5,24,0,7 19662306a36Sopenharmony_ci lwz rD3,44(rKP) 19762306a36Sopenharmony_ci rlwimi rW2,rW6,24,0,7 19862306a36Sopenharmony_ci rlwimi rW3,rW7,24,0,7 19962306a36Sopenharmony_ci blr 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci/* 20262306a36Sopenharmony_ci * ppc_decrypt_block: The central decryption function for a single 16 bytes 20362306a36Sopenharmony_ci * block. It does no stack handling or register saving to support fast calls 20462306a36Sopenharmony_ci * via bl/blr. It expects that caller has pre-xored input data with first 20562306a36Sopenharmony_ci * 4 words of encryption key into rD0-rD3. Pointer/counter registers must 20662306a36Sopenharmony_ci * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 20762306a36Sopenharmony_ci * and rW0-rW3 and caller must execute a final xor on the output registers. 20862306a36Sopenharmony_ci * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. 20962306a36Sopenharmony_ci * 21062306a36Sopenharmony_ci */ 21162306a36Sopenharmony_ci_GLOBAL(ppc_decrypt_block) 21262306a36Sopenharmony_ci LAH(rW0, rD1, 0, 12) 21362306a36Sopenharmony_ci LAH(rW6, rD0, 3, 0) 21462306a36Sopenharmony_ci LAH(rW3, rD0, 1, 8) 21562306a36Sopenharmony_cippc_decrypt_block_loop: 21662306a36Sopenharmony_ci LAH(rW1, rD3, 0, 12) 21762306a36Sopenharmony_ci LAL(rW0, rD2, 0, 12) 21862306a36Sopenharmony_ci LAH(rW2, rD2, 1, 8) 21962306a36Sopenharmony_ci LAL(rW2, rD3, 1, 8) 22062306a36Sopenharmony_ci LAH(rW4, rD3, 2, 4) 22162306a36Sopenharmony_ci LAL(rW4, rD0, 2, 4) 22262306a36Sopenharmony_ci LAL(rW6, rD1, 3, 0) 22362306a36Sopenharmony_ci LAH(rW5, rD1, 2, 4) 22462306a36Sopenharmony_ci LAH(rW7, rD2, 3, 0) 22562306a36Sopenharmony_ci LAL(rW7, rD3, 3, 0) 22662306a36Sopenharmony_ci LAL(rW3, rD1, 1, 8) 22762306a36Sopenharmony_ci evldw rD1,16(rKP) 22862306a36Sopenharmony_ci EAD(rD0, 0) 22962306a36Sopenharmony_ci evxor rW4,rW4,rW6 23062306a36Sopenharmony_ci LWL(rW1, 12) 23162306a36Sopenharmony_ci evxor rW0,rW0,rW4 23262306a36Sopenharmony_ci EAD(rD2, 2) 23362306a36Sopenharmony_ci evxor rW0,rW0,rW2 23462306a36Sopenharmony_ci LWL(rW5, 4) 23562306a36Sopenharmony_ci evxor rD1,rD1,rW0 23662306a36Sopenharmony_ci evldw rD3,24(rKP) 23762306a36Sopenharmony_ci evmergehi rD0,rD0,rD1 23862306a36Sopenharmony_ci EAD(rD1, 0) 23962306a36Sopenharmony_ci evxor rW3,rW3,rW7 24062306a36Sopenharmony_ci LWH(rW0, 12) 24162306a36Sopenharmony_ci evxor rW3,rW3,rW1 24262306a36Sopenharmony_ci EAD(rD0, 3) 24362306a36Sopenharmony_ci evxor rD3,rD3,rW3 24462306a36Sopenharmony_ci LWH(rW6, 0) 24562306a36Sopenharmony_ci evxor rD3,rD3,rW5 24662306a36Sopenharmony_ci EAD(rD0, 1) 24762306a36Sopenharmony_ci evmergehi rD2,rD2,rD3 24862306a36Sopenharmony_ci LWH(rW3, 8) 24962306a36Sopenharmony_ci LAH(rW1, rD3, 0, 12) 25062306a36Sopenharmony_ci LAL(rW0, rD2, 0, 12) 25162306a36Sopenharmony_ci LAH(rW2, rD2, 1, 8) 25262306a36Sopenharmony_ci LAL(rW2, rD3, 1, 8) 25362306a36Sopenharmony_ci LAH(rW4, rD3, 2, 4) 25462306a36Sopenharmony_ci LAL(rW4, rD0, 2, 4) 25562306a36Sopenharmony_ci LAL(rW6, rD1, 3, 0) 25662306a36Sopenharmony_ci LAH(rW5, rD1, 2, 4) 25762306a36Sopenharmony_ci LAH(rW7, rD2, 3, 0) 25862306a36Sopenharmony_ci LAL(rW7, rD3, 3, 0) 25962306a36Sopenharmony_ci LAL(rW3, rD1, 1, 8) 26062306a36Sopenharmony_ci evldw rD1,32(rKP) 26162306a36Sopenharmony_ci EAD(rD0, 0) 26262306a36Sopenharmony_ci evxor rW4,rW4,rW6 26362306a36Sopenharmony_ci LWL(rW1, 12) 26462306a36Sopenharmony_ci evxor rW0,rW0,rW4 26562306a36Sopenharmony_ci EAD(rD2, 2) 26662306a36Sopenharmony_ci evxor rW0,rW0,rW2 26762306a36Sopenharmony_ci LWL(rW5, 4) 26862306a36Sopenharmony_ci evxor rD1,rD1,rW0 26962306a36Sopenharmony_ci evldw rD3,40(rKP) 27062306a36Sopenharmony_ci evmergehi rD0,rD0,rD1 27162306a36Sopenharmony_ci EAD(rD1, 0) 27262306a36Sopenharmony_ci evxor rW3,rW3,rW7 27362306a36Sopenharmony_ci LWH(rW0, 12) 27462306a36Sopenharmony_ci evxor rW3,rW3,rW1 27562306a36Sopenharmony_ci EAD(rD0, 3) 27662306a36Sopenharmony_ci evxor rD3,rD3,rW3 27762306a36Sopenharmony_ci LWH(rW6, 0) 27862306a36Sopenharmony_ci evxor rD3,rD3,rW5 27962306a36Sopenharmony_ci EAD(rD0, 1) 28062306a36Sopenharmony_ci evmergehi rD2,rD2,rD3 28162306a36Sopenharmony_ci LWH(rW3, 8) 28262306a36Sopenharmony_ci addi rKP,rKP,32 28362306a36Sopenharmony_ci bdnz ppc_decrypt_block_loop 28462306a36Sopenharmony_ci LAH(rW1, rD3, 0, 12) 28562306a36Sopenharmony_ci LAL(rW0, rD2, 0, 12) 28662306a36Sopenharmony_ci LAH(rW2, rD2, 1, 8) 28762306a36Sopenharmony_ci LAL(rW2, rD3, 1, 8) 28862306a36Sopenharmony_ci LAH(rW4, rD3, 2, 4) 28962306a36Sopenharmony_ci LAL(rW4, rD0, 2, 4) 29062306a36Sopenharmony_ci LAL(rW6, rD1, 3, 0) 29162306a36Sopenharmony_ci LAH(rW5, rD1, 2, 4) 29262306a36Sopenharmony_ci LAH(rW7, rD2, 3, 0) 29362306a36Sopenharmony_ci LAL(rW7, rD3, 3, 0) 29462306a36Sopenharmony_ci LAL(rW3, rD1, 1, 8) 29562306a36Sopenharmony_ci evldw rD1,16(rKP) 29662306a36Sopenharmony_ci EAD(rD0, 0) 29762306a36Sopenharmony_ci evxor rW4,rW4,rW6 29862306a36Sopenharmony_ci LWL(rW1, 12) 29962306a36Sopenharmony_ci evxor rW0,rW0,rW4 30062306a36Sopenharmony_ci EAD(rD2, 2) 30162306a36Sopenharmony_ci evxor rW0,rW0,rW2 30262306a36Sopenharmony_ci LWL(rW5, 4) 30362306a36Sopenharmony_ci evxor rD1,rD1,rW0 30462306a36Sopenharmony_ci evldw rD3,24(rKP) 30562306a36Sopenharmony_ci evmergehi rD0,rD0,rD1 30662306a36Sopenharmony_ci DAD(rD1, 0) 30762306a36Sopenharmony_ci evxor rW3,rW3,rW7 30862306a36Sopenharmony_ci LBD(rW0) 30962306a36Sopenharmony_ci evxor rW3,rW3,rW1 31062306a36Sopenharmony_ci DAD(rD0, 1) 31162306a36Sopenharmony_ci evxor rD3,rD3,rW3 31262306a36Sopenharmony_ci LBD(rW6) 31362306a36Sopenharmony_ci evxor rD3,rD3,rW5 31462306a36Sopenharmony_ci DAD(rD0, 0) 31562306a36Sopenharmony_ci evmergehi rD2,rD2,rD3 31662306a36Sopenharmony_ci LBD(rW3) 31762306a36Sopenharmony_ci LAD(rW2, rD3, 0) 31862306a36Sopenharmony_ci LAD(rW1, rD2, 0) 31962306a36Sopenharmony_ci LAD(rW4, rD2, 1) 32062306a36Sopenharmony_ci LAD(rW5, rD3, 1) 32162306a36Sopenharmony_ci LAD(rW7, rD1, 1) 32262306a36Sopenharmony_ci rlwimi rW0,rW4,8,16,23 32362306a36Sopenharmony_ci rlwimi rW1,rW5,8,16,23 32462306a36Sopenharmony_ci LAD(rW4, rD3, 2) 32562306a36Sopenharmony_ci LAD(rW5, rD0, 2) 32662306a36Sopenharmony_ci rlwimi rW2,rW6,8,16,23 32762306a36Sopenharmony_ci rlwimi rW3,rW7,8,16,23 32862306a36Sopenharmony_ci LAD(rW6, rD1, 2) 32962306a36Sopenharmony_ci LAD(rW7, rD2, 2) 33062306a36Sopenharmony_ci rlwimi rW0,rW4,16,8,15 33162306a36Sopenharmony_ci rlwimi rW1,rW5,16,8,15 33262306a36Sopenharmony_ci LAD(rW4, rD0, 3) 33362306a36Sopenharmony_ci LAD(rW5, rD1, 3) 33462306a36Sopenharmony_ci rlwimi rW2,rW6,16,8,15 33562306a36Sopenharmony_ci lwz rD0,32(rKP) 33662306a36Sopenharmony_ci rlwimi rW3,rW7,16,8,15 33762306a36Sopenharmony_ci lwz rD1,36(rKP) 33862306a36Sopenharmony_ci LAD(rW6, rD2, 3) 33962306a36Sopenharmony_ci LAD(rW7, rD3, 3) 34062306a36Sopenharmony_ci rlwimi rW0,rW4,24,0,7 34162306a36Sopenharmony_ci lwz rD2,40(rKP) 34262306a36Sopenharmony_ci rlwimi rW1,rW5,24,0,7 34362306a36Sopenharmony_ci lwz rD3,44(rKP) 34462306a36Sopenharmony_ci rlwimi rW2,rW6,24,0,7 34562306a36Sopenharmony_ci rlwimi rW3,rW7,24,0,7 34662306a36Sopenharmony_ci blr 347