162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci# 362306a36Sopenharmony_ci# Accelerated chacha20 implementation for ppc64le. 462306a36Sopenharmony_ci# 562306a36Sopenharmony_ci# Copyright 2023- IBM Corp. All rights reserved 662306a36Sopenharmony_ci# 762306a36Sopenharmony_ci#=================================================================================== 862306a36Sopenharmony_ci# Written by Danny Tsen <dtsen@us.ibm.com> 962306a36Sopenharmony_ci# 1062306a36Sopenharmony_ci# chacha_p10le_8x(u32 *state, byte *dst, const byte *src, 1162306a36Sopenharmony_ci# size_t len, int nrounds); 1262306a36Sopenharmony_ci# 1362306a36Sopenharmony_ci# do rounds, 8 quarter rounds 1462306a36Sopenharmony_ci# 1. a += b; d ^= a; d <<<= 16; 1562306a36Sopenharmony_ci# 2. c += d; b ^= c; b <<<= 12; 1662306a36Sopenharmony_ci# 3. a += b; d ^= a; d <<<= 8; 1762306a36Sopenharmony_ci# 4. c += d; b ^= c; b <<<= 7 1862306a36Sopenharmony_ci# 1962306a36Sopenharmony_ci# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 16 2062306a36Sopenharmony_ci# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 12 2162306a36Sopenharmony_ci# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 8 2262306a36Sopenharmony_ci# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 7 2362306a36Sopenharmony_ci# 2462306a36Sopenharmony_ci# 4 blocks (a b c d) 2562306a36Sopenharmony_ci# 2662306a36Sopenharmony_ci# a0 b0 c0 d0 2762306a36Sopenharmony_ci# a1 b1 c1 d1 2862306a36Sopenharmony_ci# ... 2962306a36Sopenharmony_ci# a4 b4 c4 d4 3062306a36Sopenharmony_ci# ... 3162306a36Sopenharmony_ci# a8 b8 c8 d8 3262306a36Sopenharmony_ci# ... 3362306a36Sopenharmony_ci# a12 b12 c12 d12 3462306a36Sopenharmony_ci# a13 ... 3562306a36Sopenharmony_ci# a14 ... 3662306a36Sopenharmony_ci# a15 b15 c15 d15 3762306a36Sopenharmony_ci# 3862306a36Sopenharmony_ci# Column round (v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) 3962306a36Sopenharmony_ci# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) 4062306a36Sopenharmony_ci# 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#include <asm/ppc_asm.h> 4362306a36Sopenharmony_ci#include <asm/asm-offsets.h> 4462306a36Sopenharmony_ci#include <asm/asm-compat.h> 4562306a36Sopenharmony_ci#include <linux/linkage.h> 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci.machine "any" 4862306a36Sopenharmony_ci.text 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci.macro SAVE_GPR GPR OFFSET FRAME 5162306a36Sopenharmony_ci std \GPR,\OFFSET(\FRAME) 5262306a36Sopenharmony_ci.endm 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci.macro SAVE_VRS VRS OFFSET FRAME 5562306a36Sopenharmony_ci li 16, \OFFSET 5662306a36Sopenharmony_ci stvx \VRS, 16, \FRAME 5762306a36Sopenharmony_ci.endm 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci.macro SAVE_VSX VSX OFFSET FRAME 6062306a36Sopenharmony_ci li 16, \OFFSET 6162306a36Sopenharmony_ci stxvx \VSX, 16, \FRAME 6262306a36Sopenharmony_ci.endm 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci.macro RESTORE_GPR GPR OFFSET FRAME 6562306a36Sopenharmony_ci ld \GPR,\OFFSET(\FRAME) 6662306a36Sopenharmony_ci.endm 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci.macro RESTORE_VRS VRS OFFSET FRAME 6962306a36Sopenharmony_ci li 16, \OFFSET 7062306a36Sopenharmony_ci lvx \VRS, 16, \FRAME 7162306a36Sopenharmony_ci.endm 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci.macro RESTORE_VSX VSX OFFSET FRAME 7462306a36Sopenharmony_ci li 16, \OFFSET 7562306a36Sopenharmony_ci lxvx \VSX, 16, \FRAME 7662306a36Sopenharmony_ci.endm 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci.macro SAVE_REGS 7962306a36Sopenharmony_ci mflr 0 8062306a36Sopenharmony_ci std 0, 16(1) 8162306a36Sopenharmony_ci stdu 1,-752(1) 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci SAVE_GPR 14, 112, 1 8462306a36Sopenharmony_ci SAVE_GPR 15, 120, 1 8562306a36Sopenharmony_ci SAVE_GPR 16, 128, 1 8662306a36Sopenharmony_ci SAVE_GPR 17, 136, 1 8762306a36Sopenharmony_ci SAVE_GPR 18, 144, 1 8862306a36Sopenharmony_ci SAVE_GPR 19, 152, 1 8962306a36Sopenharmony_ci SAVE_GPR 20, 160, 1 9062306a36Sopenharmony_ci SAVE_GPR 21, 168, 1 9162306a36Sopenharmony_ci SAVE_GPR 22, 176, 1 9262306a36Sopenharmony_ci SAVE_GPR 23, 184, 1 9362306a36Sopenharmony_ci SAVE_GPR 24, 192, 1 9462306a36Sopenharmony_ci SAVE_GPR 25, 200, 1 9562306a36Sopenharmony_ci SAVE_GPR 26, 208, 1 9662306a36Sopenharmony_ci SAVE_GPR 27, 216, 1 9762306a36Sopenharmony_ci SAVE_GPR 28, 224, 1 9862306a36Sopenharmony_ci SAVE_GPR 29, 232, 1 9962306a36Sopenharmony_ci SAVE_GPR 30, 240, 1 10062306a36Sopenharmony_ci SAVE_GPR 31, 248, 1 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci addi 9, 1, 256 10362306a36Sopenharmony_ci SAVE_VRS 20, 0, 9 10462306a36Sopenharmony_ci SAVE_VRS 21, 16, 9 10562306a36Sopenharmony_ci SAVE_VRS 22, 32, 9 10662306a36Sopenharmony_ci SAVE_VRS 23, 48, 9 10762306a36Sopenharmony_ci SAVE_VRS 24, 64, 9 10862306a36Sopenharmony_ci SAVE_VRS 25, 80, 9 10962306a36Sopenharmony_ci SAVE_VRS 26, 96, 9 11062306a36Sopenharmony_ci SAVE_VRS 27, 112, 9 11162306a36Sopenharmony_ci SAVE_VRS 28, 128, 9 11262306a36Sopenharmony_ci SAVE_VRS 29, 144, 9 11362306a36Sopenharmony_ci SAVE_VRS 30, 160, 9 11462306a36Sopenharmony_ci SAVE_VRS 31, 176, 9 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci SAVE_VSX 14, 192, 9 11762306a36Sopenharmony_ci SAVE_VSX 15, 208, 9 11862306a36Sopenharmony_ci SAVE_VSX 16, 224, 9 11962306a36Sopenharmony_ci SAVE_VSX 17, 240, 9 12062306a36Sopenharmony_ci SAVE_VSX 18, 256, 9 12162306a36Sopenharmony_ci SAVE_VSX 19, 272, 9 12262306a36Sopenharmony_ci SAVE_VSX 20, 288, 9 12362306a36Sopenharmony_ci SAVE_VSX 21, 304, 9 12462306a36Sopenharmony_ci SAVE_VSX 22, 320, 9 12562306a36Sopenharmony_ci SAVE_VSX 23, 336, 9 12662306a36Sopenharmony_ci SAVE_VSX 24, 352, 9 12762306a36Sopenharmony_ci SAVE_VSX 25, 368, 9 12862306a36Sopenharmony_ci SAVE_VSX 26, 384, 9 12962306a36Sopenharmony_ci SAVE_VSX 27, 400, 9 13062306a36Sopenharmony_ci SAVE_VSX 28, 416, 9 13162306a36Sopenharmony_ci SAVE_VSX 29, 432, 9 13262306a36Sopenharmony_ci SAVE_VSX 30, 448, 9 13362306a36Sopenharmony_ci SAVE_VSX 31, 464, 9 13462306a36Sopenharmony_ci.endm # SAVE_REGS 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci.macro RESTORE_REGS 13762306a36Sopenharmony_ci addi 9, 1, 256 13862306a36Sopenharmony_ci RESTORE_VRS 20, 0, 9 13962306a36Sopenharmony_ci RESTORE_VRS 21, 16, 9 14062306a36Sopenharmony_ci RESTORE_VRS 22, 32, 9 14162306a36Sopenharmony_ci RESTORE_VRS 23, 48, 9 14262306a36Sopenharmony_ci RESTORE_VRS 24, 64, 9 14362306a36Sopenharmony_ci RESTORE_VRS 25, 80, 9 14462306a36Sopenharmony_ci RESTORE_VRS 26, 96, 9 14562306a36Sopenharmony_ci RESTORE_VRS 27, 112, 9 14662306a36Sopenharmony_ci RESTORE_VRS 28, 128, 9 14762306a36Sopenharmony_ci RESTORE_VRS 29, 144, 9 14862306a36Sopenharmony_ci RESTORE_VRS 30, 160, 9 14962306a36Sopenharmony_ci RESTORE_VRS 31, 176, 9 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci RESTORE_VSX 14, 192, 9 15262306a36Sopenharmony_ci RESTORE_VSX 15, 208, 9 15362306a36Sopenharmony_ci RESTORE_VSX 16, 224, 9 15462306a36Sopenharmony_ci RESTORE_VSX 17, 240, 9 15562306a36Sopenharmony_ci RESTORE_VSX 18, 256, 9 15662306a36Sopenharmony_ci RESTORE_VSX 19, 272, 9 15762306a36Sopenharmony_ci RESTORE_VSX 20, 288, 9 15862306a36Sopenharmony_ci RESTORE_VSX 21, 304, 9 15962306a36Sopenharmony_ci RESTORE_VSX 22, 320, 9 16062306a36Sopenharmony_ci RESTORE_VSX 23, 336, 9 16162306a36Sopenharmony_ci RESTORE_VSX 24, 352, 9 16262306a36Sopenharmony_ci RESTORE_VSX 25, 368, 9 16362306a36Sopenharmony_ci RESTORE_VSX 26, 384, 9 16462306a36Sopenharmony_ci RESTORE_VSX 27, 400, 9 16562306a36Sopenharmony_ci RESTORE_VSX 28, 416, 9 16662306a36Sopenharmony_ci RESTORE_VSX 29, 432, 9 16762306a36Sopenharmony_ci RESTORE_VSX 30, 448, 9 16862306a36Sopenharmony_ci RESTORE_VSX 31, 464, 9 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci RESTORE_GPR 14, 112, 1 17162306a36Sopenharmony_ci RESTORE_GPR 15, 120, 1 17262306a36Sopenharmony_ci RESTORE_GPR 16, 128, 1 17362306a36Sopenharmony_ci RESTORE_GPR 17, 136, 1 17462306a36Sopenharmony_ci RESTORE_GPR 18, 144, 1 17562306a36Sopenharmony_ci RESTORE_GPR 19, 152, 1 17662306a36Sopenharmony_ci RESTORE_GPR 20, 160, 1 17762306a36Sopenharmony_ci RESTORE_GPR 21, 168, 1 17862306a36Sopenharmony_ci RESTORE_GPR 22, 176, 1 17962306a36Sopenharmony_ci RESTORE_GPR 23, 184, 1 18062306a36Sopenharmony_ci RESTORE_GPR 24, 192, 1 18162306a36Sopenharmony_ci RESTORE_GPR 25, 200, 1 18262306a36Sopenharmony_ci RESTORE_GPR 26, 208, 1 18362306a36Sopenharmony_ci RESTORE_GPR 27, 216, 1 18462306a36Sopenharmony_ci RESTORE_GPR 28, 224, 1 18562306a36Sopenharmony_ci RESTORE_GPR 29, 232, 1 18662306a36Sopenharmony_ci RESTORE_GPR 30, 240, 1 18762306a36Sopenharmony_ci RESTORE_GPR 31, 248, 1 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci addi 1, 1, 752 19062306a36Sopenharmony_ci ld 0, 16(1) 19162306a36Sopenharmony_ci mtlr 0 19262306a36Sopenharmony_ci.endm # RESTORE_REGS 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci.macro QT_loop_8x 19562306a36Sopenharmony_ci # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) 19662306a36Sopenharmony_ci xxlor 0, 32+25, 32+25 19762306a36Sopenharmony_ci xxlor 32+25, 20, 20 19862306a36Sopenharmony_ci vadduwm 0, 0, 4 19962306a36Sopenharmony_ci vadduwm 1, 1, 5 20062306a36Sopenharmony_ci vadduwm 2, 2, 6 20162306a36Sopenharmony_ci vadduwm 3, 3, 7 20262306a36Sopenharmony_ci vadduwm 16, 16, 20 20362306a36Sopenharmony_ci vadduwm 17, 17, 21 20462306a36Sopenharmony_ci vadduwm 18, 18, 22 20562306a36Sopenharmony_ci vadduwm 19, 19, 23 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci vpermxor 12, 12, 0, 25 20862306a36Sopenharmony_ci vpermxor 13, 13, 1, 25 20962306a36Sopenharmony_ci vpermxor 14, 14, 2, 25 21062306a36Sopenharmony_ci vpermxor 15, 15, 3, 25 21162306a36Sopenharmony_ci vpermxor 28, 28, 16, 25 21262306a36Sopenharmony_ci vpermxor 29, 29, 17, 25 21362306a36Sopenharmony_ci vpermxor 30, 30, 18, 25 21462306a36Sopenharmony_ci vpermxor 31, 31, 19, 25 21562306a36Sopenharmony_ci xxlor 32+25, 0, 0 21662306a36Sopenharmony_ci vadduwm 8, 8, 12 21762306a36Sopenharmony_ci vadduwm 9, 9, 13 21862306a36Sopenharmony_ci vadduwm 10, 10, 14 21962306a36Sopenharmony_ci vadduwm 11, 11, 15 22062306a36Sopenharmony_ci vadduwm 24, 24, 28 22162306a36Sopenharmony_ci vadduwm 25, 25, 29 22262306a36Sopenharmony_ci vadduwm 26, 26, 30 22362306a36Sopenharmony_ci vadduwm 27, 27, 31 22462306a36Sopenharmony_ci vxor 4, 4, 8 22562306a36Sopenharmony_ci vxor 5, 5, 9 22662306a36Sopenharmony_ci vxor 6, 6, 10 22762306a36Sopenharmony_ci vxor 7, 7, 11 22862306a36Sopenharmony_ci vxor 20, 20, 24 22962306a36Sopenharmony_ci vxor 21, 21, 25 23062306a36Sopenharmony_ci vxor 22, 22, 26 23162306a36Sopenharmony_ci vxor 23, 23, 27 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci xxlor 0, 32+25, 32+25 23462306a36Sopenharmony_ci xxlor 32+25, 21, 21 23562306a36Sopenharmony_ci vrlw 4, 4, 25 # 23662306a36Sopenharmony_ci vrlw 5, 5, 25 23762306a36Sopenharmony_ci vrlw 6, 6, 25 23862306a36Sopenharmony_ci vrlw 7, 7, 25 23962306a36Sopenharmony_ci vrlw 20, 20, 25 # 24062306a36Sopenharmony_ci vrlw 21, 21, 25 24162306a36Sopenharmony_ci vrlw 22, 22, 25 24262306a36Sopenharmony_ci vrlw 23, 23, 25 24362306a36Sopenharmony_ci xxlor 32+25, 0, 0 24462306a36Sopenharmony_ci vadduwm 0, 0, 4 24562306a36Sopenharmony_ci vadduwm 1, 1, 5 24662306a36Sopenharmony_ci vadduwm 2, 2, 6 24762306a36Sopenharmony_ci vadduwm 3, 3, 7 24862306a36Sopenharmony_ci vadduwm 16, 16, 20 24962306a36Sopenharmony_ci vadduwm 17, 17, 21 25062306a36Sopenharmony_ci vadduwm 18, 18, 22 25162306a36Sopenharmony_ci vadduwm 19, 19, 23 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci xxlor 0, 32+25, 32+25 25462306a36Sopenharmony_ci xxlor 32+25, 22, 22 25562306a36Sopenharmony_ci vpermxor 12, 12, 0, 25 25662306a36Sopenharmony_ci vpermxor 13, 13, 1, 25 25762306a36Sopenharmony_ci vpermxor 14, 14, 2, 25 25862306a36Sopenharmony_ci vpermxor 15, 15, 3, 25 25962306a36Sopenharmony_ci vpermxor 28, 28, 16, 25 26062306a36Sopenharmony_ci vpermxor 29, 29, 17, 25 26162306a36Sopenharmony_ci vpermxor 30, 30, 18, 25 26262306a36Sopenharmony_ci vpermxor 31, 31, 19, 25 26362306a36Sopenharmony_ci xxlor 32+25, 0, 0 26462306a36Sopenharmony_ci vadduwm 8, 8, 12 26562306a36Sopenharmony_ci vadduwm 9, 9, 13 26662306a36Sopenharmony_ci vadduwm 10, 10, 14 26762306a36Sopenharmony_ci vadduwm 11, 11, 15 26862306a36Sopenharmony_ci vadduwm 24, 24, 28 26962306a36Sopenharmony_ci vadduwm 25, 25, 29 27062306a36Sopenharmony_ci vadduwm 26, 26, 30 27162306a36Sopenharmony_ci vadduwm 27, 27, 31 27262306a36Sopenharmony_ci xxlor 0, 32+28, 32+28 27362306a36Sopenharmony_ci xxlor 32+28, 23, 23 27462306a36Sopenharmony_ci vxor 4, 4, 8 27562306a36Sopenharmony_ci vxor 5, 5, 9 27662306a36Sopenharmony_ci vxor 6, 6, 10 27762306a36Sopenharmony_ci vxor 7, 7, 11 27862306a36Sopenharmony_ci vxor 20, 20, 24 27962306a36Sopenharmony_ci vxor 21, 21, 25 28062306a36Sopenharmony_ci vxor 22, 22, 26 28162306a36Sopenharmony_ci vxor 23, 23, 27 28262306a36Sopenharmony_ci vrlw 4, 4, 28 # 28362306a36Sopenharmony_ci vrlw 5, 5, 28 28462306a36Sopenharmony_ci vrlw 6, 6, 28 28562306a36Sopenharmony_ci vrlw 7, 7, 28 28662306a36Sopenharmony_ci vrlw 20, 20, 28 # 28762306a36Sopenharmony_ci vrlw 21, 21, 28 28862306a36Sopenharmony_ci vrlw 22, 22, 28 28962306a36Sopenharmony_ci vrlw 23, 23, 28 29062306a36Sopenharmony_ci xxlor 32+28, 0, 0 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) 29362306a36Sopenharmony_ci xxlor 0, 32+25, 32+25 29462306a36Sopenharmony_ci xxlor 32+25, 20, 20 29562306a36Sopenharmony_ci vadduwm 0, 0, 5 29662306a36Sopenharmony_ci vadduwm 1, 1, 6 29762306a36Sopenharmony_ci vadduwm 2, 2, 7 29862306a36Sopenharmony_ci vadduwm 3, 3, 4 29962306a36Sopenharmony_ci vadduwm 16, 16, 21 30062306a36Sopenharmony_ci vadduwm 17, 17, 22 30162306a36Sopenharmony_ci vadduwm 18, 18, 23 30262306a36Sopenharmony_ci vadduwm 19, 19, 20 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci vpermxor 15, 15, 0, 25 30562306a36Sopenharmony_ci vpermxor 12, 12, 1, 25 30662306a36Sopenharmony_ci vpermxor 13, 13, 2, 25 30762306a36Sopenharmony_ci vpermxor 14, 14, 3, 25 30862306a36Sopenharmony_ci vpermxor 31, 31, 16, 25 30962306a36Sopenharmony_ci vpermxor 28, 28, 17, 25 31062306a36Sopenharmony_ci vpermxor 29, 29, 18, 25 31162306a36Sopenharmony_ci vpermxor 30, 30, 19, 25 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci xxlor 32+25, 0, 0 31462306a36Sopenharmony_ci vadduwm 10, 10, 15 31562306a36Sopenharmony_ci vadduwm 11, 11, 12 31662306a36Sopenharmony_ci vadduwm 8, 8, 13 31762306a36Sopenharmony_ci vadduwm 9, 9, 14 31862306a36Sopenharmony_ci vadduwm 26, 26, 31 31962306a36Sopenharmony_ci vadduwm 27, 27, 28 32062306a36Sopenharmony_ci vadduwm 24, 24, 29 32162306a36Sopenharmony_ci vadduwm 25, 25, 30 32262306a36Sopenharmony_ci vxor 5, 5, 10 32362306a36Sopenharmony_ci vxor 6, 6, 11 32462306a36Sopenharmony_ci vxor 7, 7, 8 32562306a36Sopenharmony_ci vxor 4, 4, 9 32662306a36Sopenharmony_ci vxor 21, 21, 26 32762306a36Sopenharmony_ci vxor 22, 22, 27 32862306a36Sopenharmony_ci vxor 23, 23, 24 32962306a36Sopenharmony_ci vxor 20, 20, 25 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci xxlor 0, 32+25, 32+25 33262306a36Sopenharmony_ci xxlor 32+25, 21, 21 33362306a36Sopenharmony_ci vrlw 5, 5, 25 33462306a36Sopenharmony_ci vrlw 6, 6, 25 33562306a36Sopenharmony_ci vrlw 7, 7, 25 33662306a36Sopenharmony_ci vrlw 4, 4, 25 33762306a36Sopenharmony_ci vrlw 21, 21, 25 33862306a36Sopenharmony_ci vrlw 22, 22, 25 33962306a36Sopenharmony_ci vrlw 23, 23, 25 34062306a36Sopenharmony_ci vrlw 20, 20, 25 34162306a36Sopenharmony_ci xxlor 32+25, 0, 0 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci vadduwm 0, 0, 5 34462306a36Sopenharmony_ci vadduwm 1, 1, 6 34562306a36Sopenharmony_ci vadduwm 2, 2, 7 34662306a36Sopenharmony_ci vadduwm 3, 3, 4 34762306a36Sopenharmony_ci vadduwm 16, 16, 21 34862306a36Sopenharmony_ci vadduwm 17, 17, 22 34962306a36Sopenharmony_ci vadduwm 18, 18, 23 35062306a36Sopenharmony_ci vadduwm 19, 19, 20 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci xxlor 0, 32+25, 32+25 35362306a36Sopenharmony_ci xxlor 32+25, 22, 22 35462306a36Sopenharmony_ci vpermxor 15, 15, 0, 25 35562306a36Sopenharmony_ci vpermxor 12, 12, 1, 25 35662306a36Sopenharmony_ci vpermxor 13, 13, 2, 25 35762306a36Sopenharmony_ci vpermxor 14, 14, 3, 25 35862306a36Sopenharmony_ci vpermxor 31, 31, 16, 25 35962306a36Sopenharmony_ci vpermxor 28, 28, 17, 25 36062306a36Sopenharmony_ci vpermxor 29, 29, 18, 25 36162306a36Sopenharmony_ci vpermxor 30, 30, 19, 25 36262306a36Sopenharmony_ci xxlor 32+25, 0, 0 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci vadduwm 10, 10, 15 36562306a36Sopenharmony_ci vadduwm 11, 11, 12 36662306a36Sopenharmony_ci vadduwm 8, 8, 13 36762306a36Sopenharmony_ci vadduwm 9, 9, 14 36862306a36Sopenharmony_ci vadduwm 26, 26, 31 36962306a36Sopenharmony_ci vadduwm 27, 27, 28 37062306a36Sopenharmony_ci vadduwm 24, 24, 29 37162306a36Sopenharmony_ci vadduwm 25, 25, 30 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci xxlor 0, 32+28, 32+28 37462306a36Sopenharmony_ci xxlor 32+28, 23, 23 37562306a36Sopenharmony_ci vxor 5, 5, 10 37662306a36Sopenharmony_ci vxor 6, 6, 11 37762306a36Sopenharmony_ci vxor 7, 7, 8 37862306a36Sopenharmony_ci vxor 4, 4, 9 37962306a36Sopenharmony_ci vxor 21, 21, 26 38062306a36Sopenharmony_ci vxor 22, 22, 27 38162306a36Sopenharmony_ci vxor 23, 23, 24 38262306a36Sopenharmony_ci vxor 20, 20, 25 38362306a36Sopenharmony_ci vrlw 5, 5, 28 38462306a36Sopenharmony_ci vrlw 6, 6, 28 38562306a36Sopenharmony_ci vrlw 7, 7, 28 38662306a36Sopenharmony_ci vrlw 4, 4, 28 38762306a36Sopenharmony_ci vrlw 21, 21, 28 38862306a36Sopenharmony_ci vrlw 22, 22, 28 38962306a36Sopenharmony_ci vrlw 23, 23, 28 39062306a36Sopenharmony_ci vrlw 20, 20, 28 39162306a36Sopenharmony_ci xxlor 32+28, 0, 0 39262306a36Sopenharmony_ci.endm 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci.macro QT_loop_4x 39562306a36Sopenharmony_ci # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) 39662306a36Sopenharmony_ci vadduwm 0, 0, 4 39762306a36Sopenharmony_ci vadduwm 1, 1, 5 39862306a36Sopenharmony_ci vadduwm 2, 2, 6 39962306a36Sopenharmony_ci vadduwm 3, 3, 7 40062306a36Sopenharmony_ci vpermxor 12, 12, 0, 20 40162306a36Sopenharmony_ci vpermxor 13, 13, 1, 20 40262306a36Sopenharmony_ci vpermxor 14, 14, 2, 20 40362306a36Sopenharmony_ci vpermxor 15, 15, 3, 20 40462306a36Sopenharmony_ci vadduwm 8, 8, 12 40562306a36Sopenharmony_ci vadduwm 9, 9, 13 40662306a36Sopenharmony_ci vadduwm 10, 10, 14 40762306a36Sopenharmony_ci vadduwm 11, 11, 15 40862306a36Sopenharmony_ci vxor 4, 4, 8 40962306a36Sopenharmony_ci vxor 5, 5, 9 41062306a36Sopenharmony_ci vxor 6, 6, 10 41162306a36Sopenharmony_ci vxor 7, 7, 11 41262306a36Sopenharmony_ci vrlw 4, 4, 21 41362306a36Sopenharmony_ci vrlw 5, 5, 21 41462306a36Sopenharmony_ci vrlw 6, 6, 21 41562306a36Sopenharmony_ci vrlw 7, 7, 21 41662306a36Sopenharmony_ci vadduwm 0, 0, 4 41762306a36Sopenharmony_ci vadduwm 1, 1, 5 41862306a36Sopenharmony_ci vadduwm 2, 2, 6 41962306a36Sopenharmony_ci vadduwm 3, 3, 7 42062306a36Sopenharmony_ci vpermxor 12, 12, 0, 22 42162306a36Sopenharmony_ci vpermxor 13, 13, 1, 22 42262306a36Sopenharmony_ci vpermxor 14, 14, 2, 22 42362306a36Sopenharmony_ci vpermxor 15, 15, 3, 22 42462306a36Sopenharmony_ci vadduwm 8, 8, 12 42562306a36Sopenharmony_ci vadduwm 9, 9, 13 42662306a36Sopenharmony_ci vadduwm 10, 10, 14 42762306a36Sopenharmony_ci vadduwm 11, 11, 15 42862306a36Sopenharmony_ci vxor 4, 4, 8 42962306a36Sopenharmony_ci vxor 5, 5, 9 43062306a36Sopenharmony_ci vxor 6, 6, 10 43162306a36Sopenharmony_ci vxor 7, 7, 11 43262306a36Sopenharmony_ci vrlw 4, 4, 23 43362306a36Sopenharmony_ci vrlw 5, 5, 23 43462306a36Sopenharmony_ci vrlw 6, 6, 23 43562306a36Sopenharmony_ci vrlw 7, 7, 23 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) 43862306a36Sopenharmony_ci vadduwm 0, 0, 5 43962306a36Sopenharmony_ci vadduwm 1, 1, 6 44062306a36Sopenharmony_ci vadduwm 2, 2, 7 44162306a36Sopenharmony_ci vadduwm 3, 3, 4 44262306a36Sopenharmony_ci vpermxor 15, 15, 0, 20 44362306a36Sopenharmony_ci vpermxor 12, 12, 1, 20 44462306a36Sopenharmony_ci vpermxor 13, 13, 2, 20 44562306a36Sopenharmony_ci vpermxor 14, 14, 3, 20 44662306a36Sopenharmony_ci vadduwm 10, 10, 15 44762306a36Sopenharmony_ci vadduwm 11, 11, 12 44862306a36Sopenharmony_ci vadduwm 8, 8, 13 44962306a36Sopenharmony_ci vadduwm 9, 9, 14 45062306a36Sopenharmony_ci vxor 5, 5, 10 45162306a36Sopenharmony_ci vxor 6, 6, 11 45262306a36Sopenharmony_ci vxor 7, 7, 8 45362306a36Sopenharmony_ci vxor 4, 4, 9 45462306a36Sopenharmony_ci vrlw 5, 5, 21 45562306a36Sopenharmony_ci vrlw 6, 6, 21 45662306a36Sopenharmony_ci vrlw 7, 7, 21 45762306a36Sopenharmony_ci vrlw 4, 4, 21 45862306a36Sopenharmony_ci vadduwm 0, 0, 5 45962306a36Sopenharmony_ci vadduwm 1, 1, 6 46062306a36Sopenharmony_ci vadduwm 2, 2, 7 46162306a36Sopenharmony_ci vadduwm 3, 3, 4 46262306a36Sopenharmony_ci vpermxor 15, 15, 0, 22 46362306a36Sopenharmony_ci vpermxor 12, 12, 1, 22 46462306a36Sopenharmony_ci vpermxor 13, 13, 2, 22 46562306a36Sopenharmony_ci vpermxor 14, 14, 3, 22 46662306a36Sopenharmony_ci vadduwm 10, 10, 15 46762306a36Sopenharmony_ci vadduwm 11, 11, 12 46862306a36Sopenharmony_ci vadduwm 8, 8, 13 46962306a36Sopenharmony_ci vadduwm 9, 9, 14 47062306a36Sopenharmony_ci vxor 5, 5, 10 47162306a36Sopenharmony_ci vxor 6, 6, 11 47262306a36Sopenharmony_ci vxor 7, 7, 8 47362306a36Sopenharmony_ci vxor 4, 4, 9 47462306a36Sopenharmony_ci vrlw 5, 5, 23 47562306a36Sopenharmony_ci vrlw 6, 6, 23 47662306a36Sopenharmony_ci vrlw 7, 7, 23 47762306a36Sopenharmony_ci vrlw 4, 4, 23 47862306a36Sopenharmony_ci.endm 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci# Transpose 48162306a36Sopenharmony_ci.macro TP_4x a0 a1 a2 a3 48262306a36Sopenharmony_ci xxmrghw 10, 32+\a0, 32+\a1 # a0, a1, b0, b1 48362306a36Sopenharmony_ci xxmrghw 11, 32+\a2, 32+\a3 # a2, a3, b2, b3 48462306a36Sopenharmony_ci xxmrglw 12, 32+\a0, 32+\a1 # c0, c1, d0, d1 48562306a36Sopenharmony_ci xxmrglw 13, 32+\a2, 32+\a3 # c2, c3, d2, d3 48662306a36Sopenharmony_ci xxpermdi 32+\a0, 10, 11, 0 # a0, a1, a2, a3 48762306a36Sopenharmony_ci xxpermdi 32+\a1, 10, 11, 3 # b0, b1, b2, b3 48862306a36Sopenharmony_ci xxpermdi 32+\a2, 12, 13, 0 # c0, c1, c2, c3 48962306a36Sopenharmony_ci xxpermdi 32+\a3, 12, 13, 3 # d0, d1, d2, d3 49062306a36Sopenharmony_ci.endm 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci# key stream = working state + state 49362306a36Sopenharmony_ci.macro Add_state S 49462306a36Sopenharmony_ci vadduwm \S+0, \S+0, 16-\S 49562306a36Sopenharmony_ci vadduwm \S+4, \S+4, 17-\S 49662306a36Sopenharmony_ci vadduwm \S+8, \S+8, 18-\S 49762306a36Sopenharmony_ci vadduwm \S+12, \S+12, 19-\S 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci vadduwm \S+1, \S+1, 16-\S 50062306a36Sopenharmony_ci vadduwm \S+5, \S+5, 17-\S 50162306a36Sopenharmony_ci vadduwm \S+9, \S+9, 18-\S 50262306a36Sopenharmony_ci vadduwm \S+13, \S+13, 19-\S 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci vadduwm \S+2, \S+2, 16-\S 50562306a36Sopenharmony_ci vadduwm \S+6, \S+6, 17-\S 50662306a36Sopenharmony_ci vadduwm \S+10, \S+10, 18-\S 50762306a36Sopenharmony_ci vadduwm \S+14, \S+14, 19-\S 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci vadduwm \S+3, \S+3, 16-\S 51062306a36Sopenharmony_ci vadduwm \S+7, \S+7, 17-\S 51162306a36Sopenharmony_ci vadduwm \S+11, \S+11, 18-\S 51262306a36Sopenharmony_ci vadduwm \S+15, \S+15, 19-\S 51362306a36Sopenharmony_ci.endm 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci# 51662306a36Sopenharmony_ci# write 256 bytes 51762306a36Sopenharmony_ci# 51862306a36Sopenharmony_ci.macro Write_256 S 51962306a36Sopenharmony_ci add 9, 14, 5 52062306a36Sopenharmony_ci add 16, 14, 4 52162306a36Sopenharmony_ci lxvw4x 0, 0, 9 52262306a36Sopenharmony_ci lxvw4x 1, 17, 9 52362306a36Sopenharmony_ci lxvw4x 2, 18, 9 52462306a36Sopenharmony_ci lxvw4x 3, 19, 9 52562306a36Sopenharmony_ci lxvw4x 4, 20, 9 52662306a36Sopenharmony_ci lxvw4x 5, 21, 9 52762306a36Sopenharmony_ci lxvw4x 6, 22, 9 52862306a36Sopenharmony_ci lxvw4x 7, 23, 9 52962306a36Sopenharmony_ci lxvw4x 8, 24, 9 53062306a36Sopenharmony_ci lxvw4x 9, 25, 9 53162306a36Sopenharmony_ci lxvw4x 10, 26, 9 53262306a36Sopenharmony_ci lxvw4x 11, 27, 9 53362306a36Sopenharmony_ci lxvw4x 12, 28, 9 53462306a36Sopenharmony_ci lxvw4x 13, 29, 9 53562306a36Sopenharmony_ci lxvw4x 14, 30, 9 53662306a36Sopenharmony_ci lxvw4x 15, 31, 9 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci xxlxor \S+32, \S+32, 0 53962306a36Sopenharmony_ci xxlxor \S+36, \S+36, 1 54062306a36Sopenharmony_ci xxlxor \S+40, \S+40, 2 54162306a36Sopenharmony_ci xxlxor \S+44, \S+44, 3 54262306a36Sopenharmony_ci xxlxor \S+33, \S+33, 4 54362306a36Sopenharmony_ci xxlxor \S+37, \S+37, 5 54462306a36Sopenharmony_ci xxlxor \S+41, \S+41, 6 54562306a36Sopenharmony_ci xxlxor \S+45, \S+45, 7 54662306a36Sopenharmony_ci xxlxor \S+34, \S+34, 8 54762306a36Sopenharmony_ci xxlxor \S+38, \S+38, 9 54862306a36Sopenharmony_ci xxlxor \S+42, \S+42, 10 54962306a36Sopenharmony_ci xxlxor \S+46, \S+46, 11 55062306a36Sopenharmony_ci xxlxor \S+35, \S+35, 12 55162306a36Sopenharmony_ci xxlxor \S+39, \S+39, 13 55262306a36Sopenharmony_ci xxlxor \S+43, \S+43, 14 55362306a36Sopenharmony_ci xxlxor \S+47, \S+47, 15 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci stxvw4x \S+32, 0, 16 55662306a36Sopenharmony_ci stxvw4x \S+36, 17, 16 55762306a36Sopenharmony_ci stxvw4x \S+40, 18, 16 55862306a36Sopenharmony_ci stxvw4x \S+44, 19, 16 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci stxvw4x \S+33, 20, 16 56162306a36Sopenharmony_ci stxvw4x \S+37, 21, 16 56262306a36Sopenharmony_ci stxvw4x \S+41, 22, 16 56362306a36Sopenharmony_ci stxvw4x \S+45, 23, 16 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci stxvw4x \S+34, 24, 16 56662306a36Sopenharmony_ci stxvw4x \S+38, 25, 16 56762306a36Sopenharmony_ci stxvw4x \S+42, 26, 16 56862306a36Sopenharmony_ci stxvw4x \S+46, 27, 16 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci stxvw4x \S+35, 28, 16 57162306a36Sopenharmony_ci stxvw4x \S+39, 29, 16 57262306a36Sopenharmony_ci stxvw4x \S+43, 30, 16 57362306a36Sopenharmony_ci stxvw4x \S+47, 31, 16 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci.endm 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci# 57862306a36Sopenharmony_ci# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds); 57962306a36Sopenharmony_ci# 58062306a36Sopenharmony_ciSYM_FUNC_START(chacha_p10le_8x) 58162306a36Sopenharmony_ci.align 5 58262306a36Sopenharmony_ci cmpdi 6, 0 58362306a36Sopenharmony_ci ble Out_no_chacha 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci SAVE_REGS 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci # r17 - r31 mainly for Write_256 macro. 58862306a36Sopenharmony_ci li 17, 16 58962306a36Sopenharmony_ci li 18, 32 59062306a36Sopenharmony_ci li 19, 48 59162306a36Sopenharmony_ci li 20, 64 59262306a36Sopenharmony_ci li 21, 80 59362306a36Sopenharmony_ci li 22, 96 59462306a36Sopenharmony_ci li 23, 112 59562306a36Sopenharmony_ci li 24, 128 59662306a36Sopenharmony_ci li 25, 144 59762306a36Sopenharmony_ci li 26, 160 59862306a36Sopenharmony_ci li 27, 176 59962306a36Sopenharmony_ci li 28, 192 60062306a36Sopenharmony_ci li 29, 208 60162306a36Sopenharmony_ci li 30, 224 60262306a36Sopenharmony_ci li 31, 240 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci mr 15, 6 # len 60562306a36Sopenharmony_ci li 14, 0 # offset to inp and outp 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci lxvw4x 48, 0, 3 # vr16, constants 60862306a36Sopenharmony_ci lxvw4x 49, 17, 3 # vr17, key 1 60962306a36Sopenharmony_ci lxvw4x 50, 18, 3 # vr18, key 2 61062306a36Sopenharmony_ci lxvw4x 51, 19, 3 # vr19, counter, nonce 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci # create (0, 1, 2, 3) counters 61362306a36Sopenharmony_ci vspltisw 0, 0 61462306a36Sopenharmony_ci vspltisw 1, 1 61562306a36Sopenharmony_ci vspltisw 2, 2 61662306a36Sopenharmony_ci vspltisw 3, 3 61762306a36Sopenharmony_ci vmrghw 4, 0, 1 61862306a36Sopenharmony_ci vmrglw 5, 2, 3 61962306a36Sopenharmony_ci vsldoi 30, 4, 5, 8 # vr30 counter, 4 (0, 1, 2, 3) 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci vspltisw 21, 12 62262306a36Sopenharmony_ci vspltisw 23, 7 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci addis 11, 2, permx@toc@ha 62562306a36Sopenharmony_ci addi 11, 11, permx@toc@l 62662306a36Sopenharmony_ci lxvw4x 32+20, 0, 11 62762306a36Sopenharmony_ci lxvw4x 32+22, 17, 11 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci sradi 8, 7, 1 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci mtctr 8 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci # save constants to vsx 63462306a36Sopenharmony_ci xxlor 16, 48, 48 63562306a36Sopenharmony_ci xxlor 17, 49, 49 63662306a36Sopenharmony_ci xxlor 18, 50, 50 63762306a36Sopenharmony_ci xxlor 19, 51, 51 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci vspltisw 25, 4 64062306a36Sopenharmony_ci vspltisw 26, 8 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci xxlor 25, 32+26, 32+26 64362306a36Sopenharmony_ci xxlor 24, 32+25, 32+25 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci vadduwm 31, 30, 25 # counter = (0, 1, 2, 3) + (4, 4, 4, 4) 64662306a36Sopenharmony_ci xxlor 30, 32+30, 32+30 64762306a36Sopenharmony_ci xxlor 31, 32+31, 32+31 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci xxlor 20, 32+20, 32+20 65062306a36Sopenharmony_ci xxlor 21, 32+21, 32+21 65162306a36Sopenharmony_ci xxlor 22, 32+22, 32+22 65262306a36Sopenharmony_ci xxlor 23, 32+23, 32+23 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci cmpdi 6, 512 65562306a36Sopenharmony_ci blt Loop_last 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ciLoop_8x: 65862306a36Sopenharmony_ci xxspltw 32+0, 16, 0 65962306a36Sopenharmony_ci xxspltw 32+1, 16, 1 66062306a36Sopenharmony_ci xxspltw 32+2, 16, 2 66162306a36Sopenharmony_ci xxspltw 32+3, 16, 3 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci xxspltw 32+4, 17, 0 66462306a36Sopenharmony_ci xxspltw 32+5, 17, 1 66562306a36Sopenharmony_ci xxspltw 32+6, 17, 2 66662306a36Sopenharmony_ci xxspltw 32+7, 17, 3 66762306a36Sopenharmony_ci xxspltw 32+8, 18, 0 66862306a36Sopenharmony_ci xxspltw 32+9, 18, 1 66962306a36Sopenharmony_ci xxspltw 32+10, 18, 2 67062306a36Sopenharmony_ci xxspltw 32+11, 18, 3 67162306a36Sopenharmony_ci xxspltw 32+12, 19, 0 67262306a36Sopenharmony_ci xxspltw 32+13, 19, 1 67362306a36Sopenharmony_ci xxspltw 32+14, 19, 2 67462306a36Sopenharmony_ci xxspltw 32+15, 19, 3 67562306a36Sopenharmony_ci vadduwm 12, 12, 30 # increase counter 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci xxspltw 32+16, 16, 0 67862306a36Sopenharmony_ci xxspltw 32+17, 16, 1 67962306a36Sopenharmony_ci xxspltw 32+18, 16, 2 68062306a36Sopenharmony_ci xxspltw 32+19, 16, 3 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci xxspltw 32+20, 17, 0 68362306a36Sopenharmony_ci xxspltw 32+21, 17, 1 68462306a36Sopenharmony_ci xxspltw 32+22, 17, 2 68562306a36Sopenharmony_ci xxspltw 32+23, 17, 3 68662306a36Sopenharmony_ci xxspltw 32+24, 18, 0 68762306a36Sopenharmony_ci xxspltw 32+25, 18, 1 68862306a36Sopenharmony_ci xxspltw 32+26, 18, 2 68962306a36Sopenharmony_ci xxspltw 32+27, 18, 3 69062306a36Sopenharmony_ci xxspltw 32+28, 19, 0 69162306a36Sopenharmony_ci xxspltw 32+29, 19, 1 69262306a36Sopenharmony_ci vadduwm 28, 28, 31 # increase counter 69362306a36Sopenharmony_ci xxspltw 32+30, 19, 2 69462306a36Sopenharmony_ci xxspltw 32+31, 19, 3 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci.align 5 69762306a36Sopenharmony_ciquarter_loop_8x: 69862306a36Sopenharmony_ci QT_loop_8x 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci bdnz quarter_loop_8x 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci xxlor 0, 32+30, 32+30 70362306a36Sopenharmony_ci xxlor 32+30, 30, 30 70462306a36Sopenharmony_ci vadduwm 12, 12, 30 70562306a36Sopenharmony_ci xxlor 32+30, 0, 0 70662306a36Sopenharmony_ci TP_4x 0, 1, 2, 3 70762306a36Sopenharmony_ci TP_4x 4, 5, 6, 7 70862306a36Sopenharmony_ci TP_4x 8, 9, 10, 11 70962306a36Sopenharmony_ci TP_4x 12, 13, 14, 15 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci xxlor 0, 48, 48 71262306a36Sopenharmony_ci xxlor 1, 49, 49 71362306a36Sopenharmony_ci xxlor 2, 50, 50 71462306a36Sopenharmony_ci xxlor 3, 51, 51 71562306a36Sopenharmony_ci xxlor 48, 16, 16 71662306a36Sopenharmony_ci xxlor 49, 17, 17 71762306a36Sopenharmony_ci xxlor 50, 18, 18 71862306a36Sopenharmony_ci xxlor 51, 19, 19 71962306a36Sopenharmony_ci Add_state 0 72062306a36Sopenharmony_ci xxlor 48, 0, 0 72162306a36Sopenharmony_ci xxlor 49, 1, 1 72262306a36Sopenharmony_ci xxlor 50, 2, 2 72362306a36Sopenharmony_ci xxlor 51, 3, 3 72462306a36Sopenharmony_ci Write_256 0 72562306a36Sopenharmony_ci addi 14, 14, 256 # offset +=256 72662306a36Sopenharmony_ci addi 15, 15, -256 # len -=256 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci xxlor 5, 32+31, 32+31 72962306a36Sopenharmony_ci xxlor 32+31, 31, 31 73062306a36Sopenharmony_ci vadduwm 28, 28, 31 73162306a36Sopenharmony_ci xxlor 32+31, 5, 5 73262306a36Sopenharmony_ci TP_4x 16+0, 16+1, 16+2, 16+3 73362306a36Sopenharmony_ci TP_4x 16+4, 16+5, 16+6, 16+7 73462306a36Sopenharmony_ci TP_4x 16+8, 16+9, 16+10, 16+11 73562306a36Sopenharmony_ci TP_4x 16+12, 16+13, 16+14, 16+15 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci xxlor 32, 16, 16 73862306a36Sopenharmony_ci xxlor 33, 17, 17 73962306a36Sopenharmony_ci xxlor 34, 18, 18 74062306a36Sopenharmony_ci xxlor 35, 19, 19 74162306a36Sopenharmony_ci Add_state 16 74262306a36Sopenharmony_ci Write_256 16 74362306a36Sopenharmony_ci addi 14, 14, 256 # offset +=256 74462306a36Sopenharmony_ci addi 15, 15, -256 # len +=256 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci xxlor 32+24, 24, 24 74762306a36Sopenharmony_ci xxlor 32+25, 25, 25 74862306a36Sopenharmony_ci xxlor 32+30, 30, 30 74962306a36Sopenharmony_ci vadduwm 30, 30, 25 75062306a36Sopenharmony_ci vadduwm 31, 30, 24 75162306a36Sopenharmony_ci xxlor 30, 32+30, 32+30 75262306a36Sopenharmony_ci xxlor 31, 32+31, 32+31 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci cmpdi 15, 0 75562306a36Sopenharmony_ci beq Out_loop 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci cmpdi 15, 512 75862306a36Sopenharmony_ci blt Loop_last 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci mtctr 8 76162306a36Sopenharmony_ci b Loop_8x 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ciLoop_last: 76462306a36Sopenharmony_ci lxvw4x 48, 0, 3 # vr16, constants 76562306a36Sopenharmony_ci lxvw4x 49, 17, 3 # vr17, key 1 76662306a36Sopenharmony_ci lxvw4x 50, 18, 3 # vr18, key 2 76762306a36Sopenharmony_ci lxvw4x 51, 19, 3 # vr19, counter, nonce 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci vspltisw 21, 12 77062306a36Sopenharmony_ci vspltisw 23, 7 77162306a36Sopenharmony_ci addis 11, 2, permx@toc@ha 77262306a36Sopenharmony_ci addi 11, 11, permx@toc@l 77362306a36Sopenharmony_ci lxvw4x 32+20, 0, 11 77462306a36Sopenharmony_ci lxvw4x 32+22, 17, 11 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci sradi 8, 7, 1 77762306a36Sopenharmony_ci mtctr 8 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ciLoop_4x: 78062306a36Sopenharmony_ci vspltw 0, 16, 0 78162306a36Sopenharmony_ci vspltw 1, 16, 1 78262306a36Sopenharmony_ci vspltw 2, 16, 2 78362306a36Sopenharmony_ci vspltw 3, 16, 3 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci vspltw 4, 17, 0 78662306a36Sopenharmony_ci vspltw 5, 17, 1 78762306a36Sopenharmony_ci vspltw 6, 17, 2 78862306a36Sopenharmony_ci vspltw 7, 17, 3 78962306a36Sopenharmony_ci vspltw 8, 18, 0 79062306a36Sopenharmony_ci vspltw 9, 18, 1 79162306a36Sopenharmony_ci vspltw 10, 18, 2 79262306a36Sopenharmony_ci vspltw 11, 18, 3 79362306a36Sopenharmony_ci vspltw 12, 19, 0 79462306a36Sopenharmony_ci vadduwm 12, 12, 30 # increase counter 79562306a36Sopenharmony_ci vspltw 13, 19, 1 79662306a36Sopenharmony_ci vspltw 14, 19, 2 79762306a36Sopenharmony_ci vspltw 15, 19, 3 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci.align 5 80062306a36Sopenharmony_ciquarter_loop: 80162306a36Sopenharmony_ci QT_loop_4x 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci bdnz quarter_loop 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci vadduwm 12, 12, 30 80662306a36Sopenharmony_ci TP_4x 0, 1, 2, 3 80762306a36Sopenharmony_ci TP_4x 4, 5, 6, 7 80862306a36Sopenharmony_ci TP_4x 8, 9, 10, 11 80962306a36Sopenharmony_ci TP_4x 12, 13, 14, 15 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci Add_state 0 81262306a36Sopenharmony_ci Write_256 0 81362306a36Sopenharmony_ci addi 14, 14, 256 # offset += 256 81462306a36Sopenharmony_ci addi 15, 15, -256 # len += 256 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci # Update state counter 81762306a36Sopenharmony_ci vspltisw 25, 4 81862306a36Sopenharmony_ci vadduwm 30, 30, 25 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci cmpdi 15, 0 82162306a36Sopenharmony_ci beq Out_loop 82262306a36Sopenharmony_ci cmpdi 15, 256 82362306a36Sopenharmony_ci blt Out_loop 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci mtctr 8 82662306a36Sopenharmony_ci b Loop_4x 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ciOut_loop: 82962306a36Sopenharmony_ci RESTORE_REGS 83062306a36Sopenharmony_ci blr 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ciOut_no_chacha: 83362306a36Sopenharmony_ci li 3, 0 83462306a36Sopenharmony_ci blr 83562306a36Sopenharmony_ciSYM_FUNC_END(chacha_p10le_8x) 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ciSYM_DATA_START_LOCAL(PERMX) 83862306a36Sopenharmony_ci.align 5 83962306a36Sopenharmony_cipermx: 84062306a36Sopenharmony_ci.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd 84162306a36Sopenharmony_ci.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc 84262306a36Sopenharmony_ciSYM_DATA_END(PERMX) 843